diff --git a/latest/.buildinfo b/latest/.buildinfo
index e399b071ba..98392361a4 100644
--- a/latest/.buildinfo
+++ b/latest/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 5c850ce0a6f2d0ce79a91d25fbeeb241
+config: 6d408ca198781361fe3feb19254966dc
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/latest/_cpp_gen/executor.html b/latest/_cpp_gen/executor.html
index f1700a377d..df6fb636d1 100644
--- a/latest/_cpp_gen/executor.html
+++ b/latest/_cpp_gen/executor.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -12925,9 +12925,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_cpp_gen/runtime.html b/latest/_cpp_gen/runtime.html
index a6ee809136..bf16716c4d 100644
--- a/latest/_cpp_gen/runtime.html
+++ b/latest/_cpp_gen/runtime.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -9589,24 +9589,21 @@ one more than decoding draft tokens for prediction from primary head </p>
 <div class="breathe-sectiondef docutils container">
 <p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">
-<span id="_CPPv3N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="_CPPv2N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="tensorrt_llm::runtime::IGptDecoderBatched::setup__executor::DecodingModeCR.SizeType32.SizeType32.SizeType32.SizeType32.SizeType32.SizeType32.nvinfer1::DataType.ModelConfigCR.WorldConfigCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1a78998c48fc4fd319ea2696de8b0bfb1a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">setup</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">
+<span id="_CPPv3N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="_CPPv2N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="tensorrt_llm::runtime::IGptDecoderBatched::setup__executor::DecodingModeCR.SizeType32.SizeType32.SizeType32.nvinfer1::DataType.ModelConfigCR.WorldConfigCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1a5ef9dff42e3a44389c190c14914b8458"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">setup</span></span></span><span class="sig-paren">(</span>
 
 <dl>
 <dd><em class="sig-param"><a class="reference internal" href="executor.html#_CPPv4N12tensorrt_llm8executorE" title="tensorrt_llm::executor"><span class="n"><span class="pre">executor</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="executor.html#_CPPv4N12tensorrt_llm8executor12DecodingModeE" title="tensorrt_llm::executor::DecodingMode"><span class="n"><span class="pre">DecodingMode</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">mode</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxBatchSize</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxBeamWidth</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxAttentionWindow</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">sinkTokenLength</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxSequenceLength</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxTokensPerStep</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv48nvinfer1" title="nvinfer1"><span class="n"><span class="pre">nvinfer1</span></span></a><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">DataType</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dtype</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime11ModelConfigE" title="tensorrt_llm::runtime::ModelConfig"><span class="n"><span class="pre">ModelConfig</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">modelConfig</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime11WorldConfigE" title="tensorrt_llm::runtime::WorldConfig"><span class="n"><span class="pre">WorldConfig</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">worldConfig</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig" title="Link to this definition">#</a><br /></dt>
-<dd><p>Setup the decoder before calling <code class="docutils literal notranslate"><a class="reference internal" href="#classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1a57f2c8ee8a7a6cdb36d93c40ff04d7cc"><span class="std std-ref"><span class="pre">forward()</span></span></a></code></p>
+<span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig" title="Link to this definition">#</a><br /></dt>
+<dd><p>Setup the decoder before calling <code class="docutils literal notranslate"><a class="reference internal" href="#classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1ab71a988f92d801a763c8b7b960fd0769"><span class="std std-ref"><span class="pre">forward()</span></span></a></code></p>
 </dd></dl>
 
 <dl class="cpp function">
@@ -9623,28 +9620,30 @@ one more than decoding draft tokens for prediction from primary head </p>
 </dd></dl>
 
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync__decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1a90191ce0e50206fb5cb1f7fb3aa2acda"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime9CudaEventE" title="tensorrt_llm::runtime::CudaEvent"><span class="n"><span class="pre">CudaEvent</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forwardAsync</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">
+<span id="_CPPv3N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync__decoder::DecoderStateCR.decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1a654fbc257f26b53dadb65937899938c0"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime9CudaEventE" title="tensorrt_llm::runtime::CudaEvent"><span class="n"><span class="pre">CudaEvent</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forwardAsync</span></span></span><span class="sig-paren">(</span>
 
 <dl>
+<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">decoderState</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE" title="tensorrt_llm::runtime::decoder_batch::Output"><span class="n"><span class="pre">Output</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">output</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
+<span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
 <dd><p>Run one step for all requests without blocking the host process and return the token for synchronization. </p>
 </dd></dl>
 
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::IGptDecoderBatched::forward__decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1a57f2c8ee8a7a6cdb36d93c40ff04d7cc"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forward</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">
+<span id="_CPPv3N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::IGptDecoderBatched::forward__decoder::DecoderStateCR.decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1IGptDecoderBatched_1ab71a988f92d801a763c8b7b960fd0769"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forward</span></span></span><span class="sig-paren">(</span>
 
 <dl>
+<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">decoderState</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE" title="tensorrt_llm::runtime::decoder_batch::Output"><span class="n"><span class="pre">Output</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">output</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
+<span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
 <dd><p>Run one step for all requests and wait for completion on the host. </p>
 </dd></dl>
 
@@ -9992,46 +9991,30 @@ one more than decoding draft tokens for prediction from primary head </p>
 <span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a71918575432e49931d0452cfb4c98a8d"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TensorPtr</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7ITensorE" title="tensorrt_llm::runtime::ITensor"><span class="n"><span class="pre">ITensor</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE" title="tensorrt_llm::runtime::ITensor::SharedPtr"><span class="n"><span class="pre">SharedPtr</span></span></a><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE" title="Link to this definition">#</a><br /></dt>
 <dd></dd></dl>
 
-<dl class="cpp type">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a2dd6a0a3bdccf9535c9df769033efe2e"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SharedConstPtr</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7ITensorE" title="tensorrt_llm::runtime::ITensor"><span class="n"><span class="pre">ITensor</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE" title="tensorrt_llm::runtime::ITensor::SharedConstPtr"><span class="n"><span class="pre">SharedConstPtr</span></span></a><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE" title="Link to this definition">#</a><br /></dt>
-<dd></dd></dl>
-
 </div>
 <div class="breathe-sectiondef docutils container">
 <p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched__CudaStreamPtr.SpeculativeDecodingModeCR.nvinfer1::DataType"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1aca731fb55132a618ac1402370e6f55d6"></span><span class="sig-name descname"><span class="n"><span class="pre">GptDecoderBatched</span></span></span><span class="sig-paren">(</span>
-
-<dl>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE" title="tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"><span class="n"><span class="pre">CudaStreamPtr</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">stream</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE" title="tensorrt_llm::runtime::SpeculativeDecodingMode"><span class="n"><span class="pre">SpeculativeDecodingMode</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">speculativeDecodingMode</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv48nvinfer1" title="nvinfer1"><span class="n"><span class="pre">nvinfer1</span></span></a><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">DataType</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dtype</span></span></em>,</dd>
-</dl>
-
-<span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE" title="Link to this definition">#</a><br /></dt>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr">
+<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched__CudaStreamPtr"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a8629544d16a538ae9a46b0f23cccd7d3"></span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GptDecoderBatched</span></span></span><span class="sig-paren">(</span><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE" title="tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"><span class="n"><span class="pre">CudaStreamPtr</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">stream</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr" title="Link to this definition">#</a><br /></dt>
 <dd></dd></dl>
 
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::setup__executor::DecodingModeCR.SizeType32.SizeType32.SizeType32.SizeType32.SizeType32.SizeType32.nvinfer1::DataType.ModelConfigCR.WorldConfigCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a4d3eccaa52123a48e1721d06f3e464a4"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">setup</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">
+<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::setup__executor::DecodingModeCR.SizeType32.SizeType32.SizeType32.nvinfer1::DataType.ModelConfigCR.WorldConfigCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a8977d359344bba9f572e60c556b9a890"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">setup</span></span></span><span class="sig-paren">(</span>
 
 <dl>
 <dd><em class="sig-param"><a class="reference internal" href="executor.html#_CPPv4N12tensorrt_llm8executorE" title="tensorrt_llm::executor"><span class="n"><span class="pre">executor</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="executor.html#_CPPv4N12tensorrt_llm8executor12DecodingModeE" title="tensorrt_llm::executor::DecodingMode"><span class="n"><span class="pre">DecodingMode</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">mode</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxBatchSize</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxBeamWidth</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxAttentionWindow</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">sinkTokenLength</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxSequenceLength</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">maxTokensPerStep</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv48nvinfer1" title="nvinfer1"><span class="n"><span class="pre">nvinfer1</span></span></a><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">DataType</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dtype</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime11ModelConfigE" title="tensorrt_llm::runtime::ModelConfig"><span class="n"><span class="pre">ModelConfig</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">modelConfig</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime11WorldConfigE" title="tensorrt_llm::runtime::WorldConfig"><span class="n"><span class="pre">WorldConfig</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">worldConfig</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig" title="Link to this definition">#</a><br /></dt>
-<dd><p>Setup the decoder before calling <code class="docutils literal notranslate"><a class="reference internal" href="#classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a918a04e07f32ab5367f8b3e82697d7e5"><span class="std std-ref"><span class="pre">forward()</span></span></a></code></p>
+<span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig" title="Link to this definition">#</a><br /></dt>
+<dd><p>Setup the decoder before calling <code class="docutils literal notranslate"><a class="reference internal" href="#classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a41740e026890310d78a3ac98c22e3132"><span class="std std-ref"><span class="pre">forward()</span></span></a></code></p>
 </dd></dl>
 
 <dl class="cpp function">
@@ -10048,28 +10031,30 @@ one more than decoding draft tokens for prediction from primary head </p>
 </dd></dl>
 
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::forwardAsync__decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1ad043b3f39e5b6efac043b9ed90ed1b7e"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime9CudaEventE" title="tensorrt_llm::runtime::CudaEvent"><span class="n"><span class="pre">CudaEvent</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forwardAsync</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">
+<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::forwardAsync__decoder::DecoderStateCR.decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a6f28f352026b3d0beb36947fb5706392"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime9CudaEventE" title="tensorrt_llm::runtime::CudaEvent"><span class="n"><span class="pre">CudaEvent</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forwardAsync</span></span></span><span class="sig-paren">(</span>
 
 <dl>
+<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">decoderState</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE" title="tensorrt_llm::runtime::decoder_batch::Output"><span class="n"><span class="pre">Output</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">output</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
+<span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
 <dd><p>Run one step for all requests without blocking the host process and return the token for synchronization. </p>
 </dd></dl>
 
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::forward__decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a918a04e07f32ab5367f8b3e82697d7e5"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forward</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">
+<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::forward__decoder::DecoderStateCR.decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a41740e026890310d78a3ac98c22e3132"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forward</span></span></span><span class="sig-paren">(</span>
 
 <dl>
+<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">decoderState</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE" title="tensorrt_llm::runtime::decoder_batch::Output"><span class="n"><span class="pre">Output</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">output</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
+<span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
 <dd><p>Run one step for all requests and wait for completion on the host. </p>
 </dd></dl>
 
@@ -10088,16 +10073,6 @@ one more than decoding draft tokens for prediction from primary head </p>
 <dd><p>Gather final beam search results for request <code class="docutils literal notranslate"><span class="pre">batchSlot</span></code>. Result will only be available after event returned. </p>
 </dd></dl>
 
-<dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::getDecoderState"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a5b62ea90ff74aa90cee081ce7b40a2ac"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">getDecoderState</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv" title="Link to this definition">#</a><br /></dt>
-<dd></dd></dl>
-
-<dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv">
-<span id="_CPPv3NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv"></span><span id="_CPPv2NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::getDecoderStateC"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1aca6ea8e433f9ce18b066b7d1b3d4fa7d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">getDecoderState</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv" title="Link to this definition">#</a><br /></dt>
-<dd></dd></dl>
-
 <dl class="cpp function">
 <dt class="sig sig-object cpp" id="_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv">
 <span id="_CPPv3NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv"></span><span id="_CPPv2NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::getDecoderStreamC"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a00712a284f039faa4d900c53cceb7326"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE" title="tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"><span class="n"><span class="pre">CudaStreamPtr</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">getDecoderStream</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv" title="Link to this definition">#</a><br /></dt>
@@ -10125,44 +10100,19 @@ one more than decoding draft tokens for prediction from primary head </p>
 <div class="breathe-sectiondef docutils container">
 <p class="breathe-sectiondef-title rubric" id="breathe-section-title-private-functions">Private Functions</p>
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::setExplicitDraftTokensInputs__decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1ab20d2145fd96701343c46c4c7d5177a6"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">setExplicitDraftTokensInputs</span></span></span><span class="sig-paren">(</span><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
-<dd><p>Sets inputs for explicit draft tokens. </p>
-</dd></dl>
-
-<dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::setEagleInputs__decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a7e170cbad9f4ca2cd84eb3d828e81142"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">setEagleInputs</span></span></span><span class="sig-paren">(</span><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
-<dd><p>Sets inputs for eagle decoding. </p>
-</dd></dl>
-
-<dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch__decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1accb62cadbd48438f893a0cb99e7a9124"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forwardDispatch</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">
+<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch__decoder::DecoderStateCR.decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a6afa6ebdff09dba1bd53d47aa74e2967"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">forwardDispatch</span></span></span><span class="sig-paren">(</span>
 
 <dl>
+<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">decoderState</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE" title="tensorrt_llm::runtime::decoder_batch::Output"><span class="n"><span class="pre">Output</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">output</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
+<span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
 <dd><p>Calls decoders for tokens per engine step. </p>
 </dd></dl>
 
-<dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::prepareForward__SizeType32.decoder_batch::OutputR.decoder_batch::InputCR"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a965f5f5bc9be5cda3e08674642360d19"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">prepareForward</span></span></span><span class="sig-paren">(</span>
-
-<dl>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">step</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE" title="tensorrt_llm::runtime::decoder_batch::Output"><span class="n"><span class="pre">Output</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">output</span></span></em>,</dd>
-<dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batchE" title="tensorrt_llm::runtime::decoder_batch"><span class="n"><span class="pre">decoder_batch</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE" title="tensorrt_llm::runtime::decoder_batch::Input"><span class="n"><span class="pre">Input</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">input</span></span></em>,</dd>
-</dl>
-
-<span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE" title="Link to this definition">#</a><br /></dt>
-<dd><p>Prepare Input and Output for decoder step. </p>
-</dd></dl>
-
 </div>
 <div class="breathe-sectiondef docutils container">
 <p class="breathe-sectiondef-title rubric" id="breathe-section-title-private-members">Private Members</p>
@@ -10186,11 +10136,6 @@ one more than decoding draft tokens for prediction from primary head </p>
 <span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::mDecoder__GptDecoderPtr"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1acc180102b6c64b88146e253d4070e495"></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE" title="tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"><span class="n"><span class="pre">GptDecoderPtr</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">mDecoder</span></span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE" title="Link to this definition">#</a><br /></dt>
 <dd></dd></dl>
 
-<dl class="cpp var">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE">
-<span id="_CPPv3N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE"></span><span id="_CPPv2N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE"></span><span id="tensorrt_llm::runtime::GptDecoderBatched::mDecoderState__std::shared_ptr:decoder::DecoderState:"></span><span class="target" id="classtensorrt__llm_1_1runtime_1_1GptDecoderBatched_1a693f55c08ae2ab8c333a0ba115df9f48"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoderE" title="tensorrt_llm::runtime::decoder"><span class="n"><span class="pre">decoder</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE" title="tensorrt_llm::runtime::decoder::DecoderState"><span class="n"><span class="pre">DecoderState</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">mDecoderState</span></span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE" title="Link to this definition">#</a><br /></dt>
-<dd></dd></dl>
-
 </div>
 </dd></dl>
 
@@ -13316,10 +13261,10 @@ one more than decoding draft tokens for prediction from primary head </p>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE"><code class="docutils literal notranslate"><span class="pre">LlmRequestPtr</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE"><code class="docutils literal notranslate"><span class="pre">RequestVector</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE"><code class="docutils literal notranslate"><span class="pre">TensorPtr</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"><code class="docutils literal notranslate"><span class="pre">setup()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"><code class="docutils literal notranslate"><span class="pre">setup()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr"><code class="docutils literal notranslate"><span class="pre">disableLookahead()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forwardAsync()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forward()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forwardAsync()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb"><code class="docutils literal notranslate"><span class="pre">finalize()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv"><code class="docutils literal notranslate"><span class="pre">IGptDecoderBatched()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev"><code class="docutils literal notranslate"><span class="pre">~IGptDecoderBatched()</span></code></a></li>
@@ -13384,28 +13329,21 @@ one more than decoding draft tokens for prediction from primary head </p>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE"><code class="docutils literal notranslate"><span class="pre">LlmRequestPtr</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE"><code class="docutils literal notranslate"><span class="pre">RequestVector</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE"><code class="docutils literal notranslate"><span class="pre">TensorPtr</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE"><code class="docutils literal notranslate"><span class="pre">SharedConstPtr</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE"><code class="docutils literal notranslate"><span class="pre">GptDecoderBatched()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"><code class="docutils literal notranslate"><span class="pre">setup()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr"><code class="docutils literal notranslate"><span class="pre">GptDecoderBatched()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig"><code class="docutils literal notranslate"><span class="pre">setup()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr"><code class="docutils literal notranslate"><span class="pre">disableLookahead()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forwardAsync()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forward()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forwardAsync()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb"><code class="docutils literal notranslate"><span class="pre">finalize()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv"><code class="docutils literal notranslate"><span class="pre">getDecoderState()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv"><code class="docutils literal notranslate"><span class="pre">getDecoderState()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv"><code class="docutils literal notranslate"><span class="pre">getDecoderStream()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv"><code class="docutils literal notranslate"><span class="pre">getUnderlyingDecoder()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv"><code class="docutils literal notranslate"><span class="pre">getBufferManager()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE"><code class="docutils literal notranslate"><span class="pre">GptDecoderPtr</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">setExplicitDraftTokensInputs()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">setEagleInputs()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forwardDispatch()</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">prepareForward()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE"><code class="docutils literal notranslate"><span class="pre">forwardDispatch()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE"><code class="docutils literal notranslate"><span class="pre">mRuntimeStream</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE"><code class="docutils literal notranslate"><span class="pre">mDecoderStream</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE"><code class="docutils literal notranslate"><span class="pre">mBufferManager</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE"><code class="docutils literal notranslate"><span class="pre">mDecoder</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE"><code class="docutils literal notranslate"><span class="pre">mDecoderState</span></code></a></li>
 </ul>
 </li>
 </ul>
@@ -13770,9 +13708,9 @@ one more than decoding draft tokens for prediction from primary head </p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py b/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
index 32dcea9fff..cc9031bc28 100644
--- a/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
+++ b/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
@@ -1,11 +1,11 @@
 import math
 import weakref
-from enum import IntEnum
 from typing import Optional, Union, cast
 
 import torch
 from torch import nn
 
+from tensorrt_llm.logger import logger
 from tensorrt_llm.mapping import Mapping
 
 from ..attention_backend import (AttentionInputType, AttentionMetadata,
@@ -23,15 +23,6 @@ from .rms_norm import RMSNorm
 from .rotary_embedding import RotaryEmbedding
 
 
-class QkNormType(IntEnum):
-    """
-    The type of QK normalization.
-    """
-    none = 0  # No normalization applied to Q and K
-    pre_rope = 1  # Apply normalization before Rope
-    post_rope = 2  # Apply normalization after Rope
-
-
 class Attention(nn.Module):
 
     def __init__(
@@ -43,7 +34,7 @@ class Attention(nn.Module):
         max_position_embeddings: int,
         bias: bool,
         pos_embd_params: Optional[PositionalEmbeddingParams] = None,
-        qk_norm_type: QkNormType = QkNormType.none,
+        rope_fusion: Optional[bool] = None,
         layer_idx: Optional[int] = None,
         dtype: torch.dtype = None,
         dense_bias: Optional[bool] = None,
@@ -60,14 +51,14 @@ class Attention(nn.Module):
             num_key_value_heads (int): The number of key value heads.
             max_position_embeddings (int): The maximum position embeddings.
             bias (bool): Whether to use bias in the linear layers.
-            pos_embd_params (PositionalEmbeddingParams): The positional embedding parameters.
-            qk_norm_type (QkNormType): The type of QK normalization.
-            layer_idx (int): The layer index.
+            pos_embd_params (Optional[PositionalEmbeddingParams]): The positional embedding parameters.
+            rope_fusion (Optional[bool]): Whether to fuse RoPE into the attention OP and skip applying unfused RoPE. If None, whether to fuse is decided by the capability of the attention backend.
+            layer_idx (Optional[int]): The layer index.
             dtype (torch.dtype): The data type.
-            dense_bias (bool): Whether to use bias in the output projection layer.
-            config (ModelConfig): The model configuration.
+            dense_bias (Optional[bool]): Whether to use bias in the output projection layer.
+            config (Optional[ModelConfig]): The model configuration.
             q_scaling (float): The scaling factor for the qk_scale. The definition is $O = softmax(QK^T * qk_scale) * V, qk_scale = 1 / (sqrt(head_dim) * q_scaling)$. The default value is 1.0.
-            attention_chunk_size (int): See [Chunked Attention] below.
+            attention_chunk_size (Optional[int]): See [Chunked Attention] below.
         """
         super().__init__()
         self.layer_idx = layer_idx
@@ -81,7 +72,6 @@ class Attention(nn.Module):
         self.num_key_value_groups = self.num_heads // self.num_key_value_heads
         self.max_position_embeddings = max_position_embeddings
         self.pos_embd_params = pos_embd_params
-        self.qk_norm_type = qk_norm_type
         self.dense_bias = dense_bias
         self.q_scaling = q_scaling
 
@@ -169,14 +159,21 @@ class Attention(nn.Module):
         self.o_lora = LoraLayer([LoraModuleType.ATTENTION_DENSE],
                                 [self.hidden_size])
 
-        # enable_rope_fusion: Whether to fuse RoPE into the attention OP.
+        # Whether to fuse RoPE into the attention OP.
         # If true, RoPE will be applied in self.attn.forward.
         # If false, RoPE will be applied in self.apply_rope.
-        self.enable_rope_fusion = attn_cls.support_fused_rope(
-        ) and self.qk_norm_type != QkNormType.post_rope
+        self.rope_fusion = rope_fusion
+        if self.rope_fusion and not attn_cls.support_fused_rope():
+            logger.warning(
+                "rope_fusion is true but the attention backend does not support it. Will disable rope_fusion."
+            )
+            self.rope_fusion = False
+        # If rope_fusion is not specified, enable if the attention backend supports it.
+        if self.rope_fusion is None:
+            self.rope_fusion = attn_cls.support_fused_rope()
 
         self.rotary_emb = None
-        if not self.enable_rope_fusion and self.pos_embd_params is not None:
+        if not self.rope_fusion and self.pos_embd_params is not None:
             self.rotary_emb = RotaryEmbedding(
                 self.pos_embd_params.rope,
                 head_dim=self.head_dim,
@@ -189,8 +186,7 @@ class Attention(nn.Module):
             self.num_heads,
             self.head_dim,
             self.num_key_value_heads,
-            pos_embd_params=self.pos_embd_params
-            if self.enable_rope_fusion else None,
+            pos_embd_params=self.pos_embd_params if self.rope_fusion else None,
             quant_config=self.quant_config,
             skip_create_weights_in_init=config.skip_create_weights_in_init,
             q_scaling=self.q_scaling,
@@ -198,6 +194,7 @@ class Attention(nn.Module):
         )
 
         self.support_fused_qkv = self.attn.support_fused_qkv()
+        self.support_nvfp4_output = self.attn.support_nvfp4_output()
 
         if not config.skip_create_weights_in_init:
             self.create_weights()
@@ -222,7 +219,7 @@ class Attention(nn.Module):
 
     def forward(
         self,
-        position_ids: Optional[torch.LongTensor],
+        position_ids: Optional[torch.IntTensor],
         hidden_states: Union[torch.Tensor, Fp4QuantizedTensor],
         attn_metadata: AttentionMetadata,
         attention_mask: PredefinedAttentionMask = PredefinedAttentionMask.
@@ -237,7 +234,7 @@ class Attention(nn.Module):
         Forward pass for the Attention module.
 
         Args:
-            position_ids (Optional[torch.LongTensor]): The position IDs.
+            position_ids (Optional[torch.IntTensor]): The position IDs.
             hidden_states (torch.Tensor): The hidden states.
             attn_metadata (AttentionMetadata): The attention metadata.
             attention_mask (PredefinedAttentionMask): The attention mask type.
@@ -262,11 +259,16 @@ class Attention(nn.Module):
             if qkv_lora is not None:
                 qkv = qkv + qkv_lora
 
-        q, k, v = self.apply_rope(qkv, position_ids)
+        q, k, v = qkv, None, None
+
+        q, k, v = self.apply_rope(q, k, v, position_ids)
 
         out_scale = None
+        out_scale_sf = None
         if self.o_proj.has_fp8_qdq or self.o_proj.has_nvfp4 or self.o_proj.has_fp8_block_scales:
             out_scale = self.o_proj.inv_input_scale
+        if self.o_proj.has_nvfp4 and self.support_nvfp4_output:
+            out_scale_sf = self.o_proj.input_scale
 
         q, k, v = self.convert_qkv(q, k, v)
         attn_output = self.attn.forward(
@@ -275,6 +277,7 @@ class Attention(nn.Module):
             v,
             attn_metadata,
             out_scale=out_scale,
+            out_scale_sf=out_scale_sf,
             attention_mask=attention_mask,
             mrope_config=mrope_config,
             attention_window_size=attention_window_size)
@@ -285,32 +288,25 @@ class Attention(nn.Module):
                                   layer_idx=self.layer_idx)
         return attn_output
 
-    def apply_qk_norm(self, q, k):
-        raise NotImplementedError(
-            f"QK norm is not implemented for {self.__class__.__name__}."
-            "Please override the `apply_qk_norm` method in the subclass.")
-
-    def apply_rope(self, qkv: torch.Tensor, position_ids: torch.Tensor):
+    def apply_rope(self, q: torch.Tensor, k: Optional[torch.Tensor],
+                   v: Optional[torch.Tensor], position_ids: torch.Tensor):
         """
-        Apply RoPE to the query and key, possibly including QK norm.
+        Apply RoPE to the query and key.
+        Depending on the implementation, q, k, v could be either fused (q, k, v = concat(q, k, v), None, None) or unfused (none of q, k, v is None).
+        Before self.attn.forward, convert_qkv will be called to make sure that the format of (q, k, v) satisfies the requirement of self.attn.
+        This method could be overridden in the subclass, in which extra functionalities such as q_norm/k_norm could be added.
         Args:
-            qkv (torch.Tensor): The query, key, and value tensor.
+            q (torch.Tensor): The query tensor.
+            k (Optional[torch.Tensor]): The key tensor.
+            v (Optional[torch.Tensor]): The value tensor.
             position_ids (torch.Tensor): The position IDs of each token for RoPE.
         Returns:
             tuple: A tuple of (q, k, v).
-            This method could be overridden in the subclass, it is possible that k/v is None and q is the concatenated qkv tensor, up to the implementation.
-            Before self.attn.forward, convert_qkv will be called to make sure that the format of (q, k, v) satisfies the requirement of self.attn.
         """
-        q, k, v = qkv, None, None
-        if self.qk_norm_type == QkNormType.pre_rope:
-            q, k, v = self.split_qkv(q, k, v)
-            q, k = self.apply_qk_norm(q, k)
-        if not self.enable_rope_fusion and position_ids is not None:
-            q, k, v = self.split_qkv(q, k, v)
+        q, k, v = self.split_qkv(q, k, v)
+        # If RoPE is fused into the attention OP, do not apply RoPE here.
+        if not self.rope_fusion and position_ids is not None:
             q, k = self.rotary_emb(position_ids, [q, k])
-            if self.qk_norm_type == QkNormType.post_rope:
-                q, k = self.apply_qk_norm(q, k)
-
         return q, k, v
 
 
@@ -595,14 +591,14 @@ class MLA(nn.Module):
         self.aux_stream = aux_stream
         self.ln_events = [torch.cuda.Event(), torch.cuda.Event()]
 
-        self.enable_rope_fusion = self.mha.support_fused_rope()
+        self.rope_fusion = self.mha.support_fused_rope()
         self.support_fused_qkv = self.mha.support_fused_qkv()
         self.rotary_emb = RotaryEmbedding(
             pos_embd_params.rope,
             head_dim=self.qk_rope_head_dim,
             is_neox=pos_embd_params.is_neox,
         )
-        self.apply_rotary_emb = not self.enable_rope_fusion
+        self.apply_rotary_emb = not self.rope_fusion
 
         if not config.skip_create_weights_in_init:
             self.create_weights()
@@ -687,7 +683,7 @@ class MLA(nn.Module):
         Forward pass for the MLA module.
 
         Args:
-            position_ids (Optional[torch.LongTensor]): The position IDs.
+            position_ids (Optional[torch.IntTensor]): The position IDs.
             hidden_states (torch.Tensor): The hidden states.
             attn_metadata (AttentionMetadata): The attention metadata.
             all_reduce_params (Optional[AllReduceParams]): The all reduce parameters.
@@ -841,7 +837,7 @@ class MLA(nn.Module):
         compressed_kv: torch.Tensor,
         k_pe: torch.Tensor,
         attn_metadata: AttentionMetadata,
-        position_ids: Optional[torch.LongTensor] = None,
+        position_ids: Optional[torch.IntTensor] = None,
     ) -> torch.Tensor:
         trtllm_attention = cast(TrtllmAttention, self.mha)
         # split current q into q_nope and q_pe
@@ -949,7 +945,7 @@ class MLA(nn.Module):
         k_pe: torch.Tensor,
         attn_metadata: AttentionMetadata,
         latent_cache: Optional[torch.Tensor] = None,
-        position_ids: Optional[torch.LongTensor] = None,
+        position_ids: Optional[torch.IntTensor] = None,
     ) -> torch.Tensor:
         if isinstance(self.mha, TrtllmAttention):
             assert isinstance(attn_metadata, TrtllmAttentionMetadata)
diff --git a/latest/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py b/latest/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py
index 0835058eda..42a5bc282b 100644
--- a/latest/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py
+++ b/latest/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py
@@ -42,7 +42,6 @@ from ..bindings.executor import (
                                  PeftCacheConfig as _PeftCacheConfig,
                                  SchedulerConfig as _SchedulerConfig) # isort: skip
 # isort: on
-from transformers import PreTrainedTokenizerBase
 
 # yapf: enable
 from ..builder import BuildConfig, EngineConfig
@@ -1087,7 +1086,7 @@ class BaseLlmArgs(BaseModel):
             self.speculative_model
         ) if self.speculative_model is not None else None
         if model_obj.is_local_model and self.backend not in [
-                'pytorch', 'autodeploy'
+                'pytorch', '_autodeploy'
         ]:
             # Load parallel_config from the engine.
             self.model_format = get_model_format(self.model)
@@ -1191,7 +1190,7 @@ class BaseLlmArgs(BaseModel):
 
                 self.build_config.max_draft_len = self.speculative_config.max_draft_len
 
-                if self.backend != 'pytorch':
+                if self.backend not in ['pytorch', '_autodeploy']:
                     eagle_config = _EagleConfig(
                         self.speculative_config.eagle_choices,
                         self.speculative_config.greedy_sampling,
@@ -1211,7 +1210,7 @@ class BaseLlmArgs(BaseModel):
                         eagle3_one_model)
             elif isinstance(self.speculative_config, NGramDecodingConfig):
                 self.build_config.speculative_decoding_mode = SpeculativeDecodingMode.NGRAM
-                assert self.backend == 'pytorch'
+                assert self.backend in ['pytorch', '_autodeploy']
                 assert self.speculative_config.prompt_lookup_num_tokens > 0 and self.speculative_config.max_matching_ngram_size > 0
                 self.build_config.max_draft_len = self.speculative_config.max_draft_len
                 from tensorrt_llm._torch.speculative import NGramConfig
@@ -1259,9 +1258,11 @@ class BaseLlmArgs(BaseModel):
                     "lora_dir is empty, so custom embedding or lm head will not be applied."
                 )
 
-        if self.enable_lora and self.lora_config is not None and self.backend == 'pytorch':
+        if self.enable_lora and self.lora_config is not None and self.backend in [
+                'pytorch', '_autodeploy'
+        ]:
             logger.warning(
-                "enable_lora is ignored when lora_config is provided for pytorch backend."
+                f"enable_lora is ignored when lora_config is provided for {self.backend} backend."
             )
 
         if self.lora_config is not None:
@@ -1634,11 +1635,6 @@ class TorchLlmArgs(BaseLlmArgs):
     def get_pytorch_backend_config(self) -> "PyTorchConfig":
         from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
 
-        # TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
-        # Just a WAR to support the auto_deploy
-        if self.auto_deploy_config is not None:
-            return self.auto_deploy_config
-
         return PyTorchConfig(
             extra_resource_managers=self.extra_resource_managers,
             use_cuda_graph=self.use_cuda_graph,
@@ -1718,7 +1714,7 @@ class TorchLlmArgs(BaseLlmArgs):
         2. If cuda_graph_batch_sizes is not provided, it is generated based on cuda_graph_max_batch_size
         3. If both are provided, cuda_graph_batch_sizes must match the generated values
         """
-        if self.cuda_graph_batch_sizes is not None:
+        if self.cuda_graph_batch_sizes:
             self.cuda_graph_batch_sizes = sorted(self.cuda_graph_batch_sizes)
             if self.cuda_graph_max_batch_size != 0:
                 if self.cuda_graph_batch_sizes != self._generate_cuda_graph_batch_sizes(
@@ -1743,6 +1739,109 @@ class TorchLlmArgs(BaseLlmArgs):
         return self
 
 
+class _AutoDeployLlmArgs(TorchLlmArgs):
+    """LLM arguments specifically for AutoDeploy backend.
+
+    This class extends TorchLlmArgs with AutoDeploy-specific configuration options.
+    AutoDeploy provides automatic deployment and optimization of language models
+    with various attention backends and optimization strategies.
+    """
+
+    model_factory: Literal[
+        "AutoModelForCausalLM", "AutoModelForImageTextToText"] = Field(
+            default="AutoModelForCausalLM",
+            description="The model factory to use for loading the model.",
+        )
+
+    model_kwargs: Dict[str, Any] = Field(
+        default_factory=dict,
+        description=
+        "Extra kwargs for the model config class to customize the model config. "
+        "These arguments take precedence over default values or config values in the model config "
+        "file. Arguments are resolved in order: 1) Default values in model config class, 2) Values "
+        "in model config file, 3) Values in model_kwargs. Note: if a kwarg doesn't exist in the "
+        "model config class, it will be ignored.",
+    )
+
+    mla_backend: Literal["MultiHeadLatentAttention"] = Field(
+        default="MultiHeadLatentAttention",
+        description="The Multi-Head Latent Attention backend to use.",
+    )
+
+    skip_loading_weights: bool = Field(
+        default=False,
+        description=
+        "Whether to skip loading model weights during initialization. "
+        "If True, only the model architecture is loaded.",
+    )
+
+    free_mem_ratio: float = Field(
+        default=0.8,
+        description="The fraction of available memory to allocate for cache. "
+        "Must be between 0.0 and 1.0.",
+    )
+
+    simple_shard_only: bool = Field(
+        default=False,
+        description=
+        "If True, force simple sharding (all_gather) in tensor parallelism. "
+        "If False, auto-detect and use column+row (all_reduce) sharding when possible.",
+    )
+
+    # TODO: Remove this field once tokens_per_block is properly passed through
+    attn_page_size: int = Field(
+        default=64,
+        description=
+        "Page size for attention (tokens_per_block). For TritonWithFlattenedInputs "
+        "backend, this should equal max_seq_len. Temporary field until tokens_per_block gets "
+        "properly passed through.",
+    )
+
+    @field_validator("free_mem_ratio")
+    @classmethod
+    def validate_free_mem_ratio(cls, v):
+        """Validate that free_mem_ratio is between 0.0 and 1.0."""
+        if not 0.0 <= v <= 1.0:
+            raise ValueError(
+                f"free_mem_ratio must be between 0.0 and 1.0, got {v}")
+        return v
+
+    @print_traceback_on_error
+    def model_post_init(self, __context):
+        # Modify default values that differ from TorchLlmArgs
+        new_defaults = {
+            "max_batch_size": 8,
+            "max_seq_len": 512,
+            "attn_backend": "FlashInfer",
+            # TODO: Remove this when overlap scheduler is supported (https://github.com/NVIDIA/TensorRT-LLM/issues/4364)
+            "disable_overlap_scheduler": True,
+        }
+        for k, v_default in new_defaults.items():
+            if k not in self.__pydantic_fields_set__:
+                setattr(self, k, v_default)
+
+        # NOTE: Only call super() after setting the default values since default values should be
+        # set first.
+        super().model_post_init(__context)
+
+        # Handle attn_page_size for TritonWithFlattenedInputs backend
+        if self.attn_backend == "TritonWithFlattenedInputs":
+            self.attn_page_size = self.max_seq_len
+
+        # Add max_position_embeddings to model_kwargs
+        # TODO (lucaslie): this is more HF specific than a generic model_kwargs. Ideally, we can
+        # move this to the HF model factory but we don't have access to max_seq_len there right now.
+        self.model_kwargs["max_position_embeddings"] = min(
+            self.max_seq_len,
+            self.model_kwargs.get("max_position_embeddings", self.max_seq_len),
+        )
+
+    # TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
+    def get_pytorch_backend_config(self) -> "_AutoDeployLlmArgs":
+        """Return the _AutoDeployLlmArgs (self) object."""
+        return self
+
+
 def update_llm_args_with_extra_dict(
         llm_args: Dict,
         llm_args_dict: Dict,
diff --git a/latest/_modules/index.html b/latest/_modules/index.html
index d43b59af11..2809ca5be1 100644
--- a/latest/_modules/index.html
+++ b/latest/_modules/index.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -687,9 +687,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/builder.html b/latest/_modules/tensorrt_llm/builder.html
index 7d244c86a4..9eb769ac2e 100644
--- a/latest/_modules/tensorrt_llm/builder.html
+++ b/latest/_modules/tensorrt_llm/builder.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1993,9 +1993,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/disaggregated_params.html b/latest/_modules/tensorrt_llm/disaggregated_params.html
index d717c7998e..c5efe951c0 100644
--- a/latest/_modules/tensorrt_llm/disaggregated_params.html
+++ b/latest/_modules/tensorrt_llm/disaggregated_params.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -668,9 +668,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/executor/result.html b/latest/_modules/tensorrt_llm/executor/result.html
index 5d2f55c78f..095a0eeac3 100644
--- a/latest/_modules/tensorrt_llm/executor/result.html
+++ b/latest/_modules/tensorrt_llm/executor/result.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -721,10 +721,6 @@
         <span class="k">else</span><span class="p">:</span>
             <span class="n">output</span><span class="o">.</span><span class="n">token_ids</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">response_tensors</span><span class="o">.</span><span class="n">output_token_ids</span><span class="p">[</span><span class="n">src_idx</span><span class="p">])</span>
 
-        <span class="c1"># In PD, the first token should be ignored in streaming mode, since it&#39;s already been returned by the context server</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">disaggregated_params</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">disaggregated_params</span><span class="o">.</span><span class="n">request_type</span> <span class="o">==</span> <span class="s2">&quot;generation_only&quot;</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_streaming</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoding_iter</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
-            <span class="n">output</span><span class="o">.</span><span class="n">_last_token_ids_len</span> <span class="o">=</span> <span class="mi">1</span>
-
         <span class="k">if</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">cum_log_probs</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
             <span class="n">output</span><span class="o">.</span><span class="n">cumulative_logprob</span> <span class="o">=</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">cum_log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">]</span>
 
@@ -1273,9 +1269,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/executor/utils.html b/latest/_modules/tensorrt_llm/executor/utils.html
index eeced86c7f..92b9bd23ad 100644
--- a/latest/_modules/tensorrt_llm/executor/utils.html
+++ b/latest/_modules/tensorrt_llm/executor/utils.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -519,7 +519,6 @@
 <span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm._utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">mpi_rank</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.bindings.executor</span><span class="w"> </span><span class="kn">import</span> <span class="n">Response</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.llmapi.utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">print_colored_debug</span>
-<span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.logger</span><span class="w"> </span><span class="kn">import</span> <span class="n">logger</span>
 
 <span class="kn">from</span><span class="w"> </span><span class="nn">..llmapi.mpi_session</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">MpiCommSession</span><span class="p">,</span> <span class="n">MpiPoolSession</span><span class="p">,</span> <span class="n">MpiSession</span><span class="p">,</span>
                                   <span class="n">RemoteMpiCommSessionClient</span><span class="p">)</span>
@@ -536,10 +535,6 @@
     <span class="n">TLLM_EXECUTOR_PERIODICAL_RESP_IN_AWAIT</span> <span class="o">=</span> <span class="s2">&quot;TLLM_EXECUTOR_PERIODICAL_RESP_IN_AWAIT&quot;</span>
 
 
-<span class="n">PERIODICAL_RESP_IN_AWAIT</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span>
-    <span class="n">LlmLauncherEnvs</span><span class="o">.</span><span class="n">TLLM_EXECUTOR_PERIODICAL_RESP_IN_AWAIT</span><span class="p">)</span> <span class="o">==</span> <span class="s2">&quot;1&quot;</span>
-
-
 <span class="k">def</span><span class="w"> </span><span class="nf">get_spawn_proxy_process_ipc_addr_env</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&#39;&#39;&#39; Get the IPC address for the spawn proxy process dynamically. &#39;&#39;&#39;</span>
     <span class="k">return</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="n">LlmLauncherEnvs</span><span class="o">.</span><span class="n">TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR</span><span class="p">)</span>
@@ -556,10 +551,6 @@
     <span class="k">return</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="n">LlmLauncherEnvs</span><span class="o">.</span><span class="n">TLLM_SPAWN_PROXY_PROCESS</span><span class="p">)</span> <span class="o">==</span> <span class="s2">&quot;1&quot;</span>
 
 
-<span class="k">if</span> <span class="n">PERIODICAL_RESP_IN_AWAIT</span><span class="p">:</span>
-    <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Using periodical responses in await_responses&quot;</span><span class="p">)</span>
-
-
 <span class="k">def</span><span class="w"> </span><span class="nf">create_mpi_comm_session</span><span class="p">(</span>
         <span class="n">n_workers</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RemoteMpiCommSessionClient</span> <span class="o">|</span> <span class="n">MpiPoolSession</span><span class="p">:</span>
     <span class="k">assert</span> <span class="n">mpi_rank</span><span class="p">(</span>
@@ -658,7 +649,7 @@
 <span class="k">class</span><span class="w"> </span><span class="nc">WorkerCommIpcAddrs</span><span class="p">(</span><span class="n">NamedTuple</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&#39;&#39;&#39; IPC addresses (str) and HMAC keys (bytes) for communication with the worker processes. &#39;&#39;&#39;</span>
     <span class="n">request_queue_addr</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]]</span>
-    <span class="n">request_error_queue_addr</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]]</span>
+    <span class="n">worker_init_status_queue_addr</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]]</span>
     <span class="n">result_queue_addr</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]]</span>
     <span class="n">stats_queue_addr</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]]</span>
     <span class="n">kv_cache_events_queue_addr</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]]</span>
@@ -781,9 +772,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/functional.html b/latest/_modules/tensorrt_llm/functional.html
index e9badfb41b..8988c0cabd 100644
--- a/latest/_modules/tensorrt_llm/functional.html
+++ b/latest/_modules/tensorrt_llm/functional.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -8681,9 +8681,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/activation.html b/latest/_modules/tensorrt_llm/layers/activation.html
index 6f42b49a0b..1bfdce8ca5 100644
--- a/latest/_modules/tensorrt_llm/layers/activation.html
+++ b/latest/_modules/tensorrt_llm/layers/activation.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -646,9 +646,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/attention.html b/latest/_modules/tensorrt_llm/layers/attention.html
index d1d370eaf2..8287f7efcf 100644
--- a/latest/_modules/tensorrt_llm/layers/attention.html
+++ b/latest/_modules/tensorrt_llm/layers/attention.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -3511,9 +3511,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/cast.html b/latest/_modules/tensorrt_llm/layers/cast.html
index 8a50d31b0e..ccd71c5ece 100644
--- a/latest/_modules/tensorrt_llm/layers/cast.html
+++ b/latest/_modules/tensorrt_llm/layers/cast.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -653,9 +653,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/conv.html b/latest/_modules/tensorrt_llm/layers/conv.html
index 83fc9ea691..6ddea3d38c 100644
--- a/latest/_modules/tensorrt_llm/layers/conv.html
+++ b/latest/_modules/tensorrt_llm/layers/conv.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -902,9 +902,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/embedding.html b/latest/_modules/tensorrt_llm/layers/embedding.html
index aacfd70035..c883945de8 100644
--- a/latest/_modules/tensorrt_llm/layers/embedding.html
+++ b/latest/_modules/tensorrt_llm/layers/embedding.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1369,9 +1369,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/linear.html b/latest/_modules/tensorrt_llm/layers/linear.html
index f399188379..c2bb0d63d5 100644
--- a/latest/_modules/tensorrt_llm/layers/linear.html
+++ b/latest/_modules/tensorrt_llm/layers/linear.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1217,9 +1217,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/mlp.html b/latest/_modules/tensorrt_llm/layers/mlp.html
index e5bfd99f21..64b52880fe 100644
--- a/latest/_modules/tensorrt_llm/layers/mlp.html
+++ b/latest/_modules/tensorrt_llm/layers/mlp.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1243,9 +1243,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/normalization.html b/latest/_modules/tensorrt_llm/layers/normalization.html
index 39cca5e8ac..175ba76108 100644
--- a/latest/_modules/tensorrt_llm/layers/normalization.html
+++ b/latest/_modules/tensorrt_llm/layers/normalization.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1007,9 +1007,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/pooling.html b/latest/_modules/tensorrt_llm/layers/pooling.html
index 3b9b232be7..61b50b28fc 100644
--- a/latest/_modules/tensorrt_llm/layers/pooling.html
+++ b/latest/_modules/tensorrt_llm/layers/pooling.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -662,9 +662,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/build_cache.html b/latest/_modules/tensorrt_llm/llmapi/build_cache.html
index 211ec0ce6a..764852987d 100644
--- a/latest/_modules/tensorrt_llm/llmapi/build_cache.html
+++ b/latest/_modules/tensorrt_llm/llmapi/build_cache.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -946,9 +946,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/llm.html b/latest/_modules/tensorrt_llm/llmapi/llm.html
index 9f22875735..45e38c1767 100644
--- a/latest/_modules/tensorrt_llm/llmapi/llm.html
+++ b/latest/_modules/tensorrt_llm/llmapi/llm.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -511,7 +511,9 @@
 <span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
 <span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
 <span class="kn">import</span><span class="w"> </span><span class="nn">shutil</span>
+<span class="kn">import</span><span class="w"> </span><span class="nn">socket</span>
 <span class="kn">import</span><span class="w"> </span><span class="nn">tempfile</span>
+<span class="kn">import</span><span class="w"> </span><span class="nn">time</span>
 <span class="kn">import</span><span class="w"> </span><span class="nn">weakref</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Literal</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">,</span> <span class="n">Union</span>
@@ -538,7 +540,7 @@
 <span class="kn">from</span><span class="w"> </span><span class="nn">..logger</span><span class="w"> </span><span class="kn">import</span> <span class="n">logger</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">..sampling_params</span><span class="w"> </span><span class="kn">import</span> <span class="n">SamplingParams</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">.llm_args</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">LLMARGS_EXPLICIT_DOCSTRING</span><span class="p">,</span> <span class="n">PybindMirror</span><span class="p">,</span> <span class="n">TorchLlmArgs</span><span class="p">,</span>
-                       <span class="n">TrtLlmArgs</span><span class="p">)</span>
+                       <span class="n">TrtLlmArgs</span><span class="p">,</span> <span class="n">_AutoDeployLlmArgs</span><span class="p">)</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">.llm_utils</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">CachedModelLoader</span><span class="p">,</span> <span class="n">KvCacheRetentionConfig</span><span class="p">,</span>
                         <span class="n">LlmBuildStats</span><span class="p">,</span> <span class="n">ModelLoader</span><span class="p">,</span> <span class="n">_ModelRuntimeContext</span><span class="p">)</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">.mpi_session</span><span class="w"> </span><span class="kn">import</span> <span class="n">MpiPoolSession</span><span class="p">,</span> <span class="n">external_mpi_comm_available</span>
@@ -601,6 +603,7 @@
 <span class="s2">    Attributes:</span>
 <span class="s2">        tokenizer (tensorrt_llm.llmapi.tokenizer.TokenizerBase, optional): The tokenizer loaded by LLM instance, if any.</span>
 <span class="s2">        workspace (pathlib.Path): The directory to store intermediate files.</span>
+<span class="s2">        llm_id (str): The unique ID of the LLM instance.</span>
 <span class="s2">&quot;&quot;&quot;</span>
 
 
@@ -629,10 +632,16 @@
                  <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 
         <span class="bp">self</span><span class="o">.</span><span class="n">_executor_cls</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;executor_cls&quot;</span><span class="p">,</span> <span class="n">GenerationExecutor</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">_llm_id</span> <span class="o">=</span> <span class="kc">None</span>
 
         <span class="k">try</span><span class="p">:</span>
-            <span class="n">llm_args_cls</span> <span class="o">=</span> <span class="n">TorchLlmArgs</span> <span class="k">if</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
-                <span class="s1">&#39;backend&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;pytorch&#39;</span> <span class="k">else</span> <span class="n">TrtLlmArgs</span>
+            <span class="n">backend</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;backend&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+            <span class="k">if</span> <span class="n">backend</span> <span class="o">==</span> <span class="s1">&#39;pytorch&#39;</span><span class="p">:</span>
+                <span class="n">llm_args_cls</span> <span class="o">=</span> <span class="n">TorchLlmArgs</span>
+            <span class="k">elif</span> <span class="n">backend</span> <span class="o">==</span> <span class="s1">&#39;_autodeploy&#39;</span><span class="p">:</span>
+                <span class="n">llm_args_cls</span> <span class="o">=</span> <span class="n">_AutoDeployLlmArgs</span>
+            <span class="k">else</span><span class="p">:</span>
+                <span class="n">llm_args_cls</span> <span class="o">=</span> <span class="n">TrtLlmArgs</span>
 
             <span class="bp">self</span><span class="o">.</span><span class="n">args</span> <span class="o">=</span> <span class="n">llm_args_cls</span><span class="o">.</span><span class="n">from_kwargs</span><span class="p">(</span>
                 <span class="n">model</span><span class="o">=</span><span class="n">model</span><span class="p">,</span>
@@ -706,6 +715,16 @@
     <span class="k">def</span><span class="w"> </span><span class="nf">workspace</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Path</span><span class="p">:</span>
         <span class="k">return</span> <span class="n">Path</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_workspace</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_on_trt_backend</span> <span class="k">else</span> <span class="kc">None</span>
 
+    <span class="nd">@property</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">llm_id</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_llm_id</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">hostname</span> <span class="o">=</span> <span class="n">socket</span><span class="o">.</span><span class="n">gethostname</span><span class="p">()</span>
+            <span class="n">pid</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getpid</span><span class="p">()</span>
+            <span class="n">timestamp</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">_llm_id</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">hostname</span><span class="si">}</span><span class="s2">-</span><span class="si">{</span><span class="n">pid</span><span class="si">}</span><span class="s2">-</span><span class="si">{</span><span class="n">timestamp</span><span class="si">}</span><span class="s2">&quot;</span>
+
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_llm_id</span>
+
 <div class="viewcode-block" id="LLM.generate">
 <a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate">[docs]</a>
     <span class="k">def</span><span class="w"> </span><span class="nf">generate</span><span class="p">(</span>
@@ -989,7 +1008,7 @@
                     <span class="p">)</span>
                 <span class="n">sampling_params</span><span class="o">.</span><span class="n">_setup</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span><span class="p">)</span>
             <span class="c1"># auto enabled context and/or generation logits flags, as they are required by logprob computation for TRT backend.</span>
-            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;pytorch&quot;</span><span class="p">,</span> <span class="s2">&quot;autodeploy&quot;</span><span class="p">]:</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;pytorch&quot;</span><span class="p">,</span> <span class="s2">&quot;_autodeploy&quot;</span><span class="p">]:</span>
                 <span class="k">if</span> <span class="n">sampling_params</span><span class="o">.</span><span class="n">prompt_logprobs</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">sampling_params</span><span class="o">.</span><span class="n">return_context_logits</span><span class="p">:</span>
                     <span class="n">sampling_params</span><span class="o">.</span><span class="n">return_context_logits</span> <span class="o">=</span> <span class="kc">True</span>
                     <span class="n">sampling_params</span><span class="o">.</span><span class="n">_context_logits_auto_enabled</span> <span class="o">=</span> <span class="kc">True</span>
@@ -1006,7 +1025,7 @@
     <span class="k">def</span><span class="w"> </span><span class="nf">_check_arguments</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">prompt_len</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">query_len</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
                          <span class="n">sampling_params</span><span class="p">:</span> <span class="n">SamplingParams</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;pytorch&quot;</span><span class="p">:</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;pytorch&quot;</span><span class="p">,</span> <span class="s2">&quot;_autodeploy&quot;</span><span class="p">]:</span>
             <span class="c1"># TODO: remove these checks after PyTorch backend</span>
             <span class="c1"># fully support TopK prompt and generation logprobs.</span>
             <span class="k">if</span> <span class="n">sampling_params</span><span class="o">.</span><span class="n">prompt_logprobs</span><span class="p">:</span>
@@ -1018,7 +1037,7 @@
                     <span class="sa">f</span><span class="s2">&quot;PyTorch backend currently only supports `logprobs=1`. Received `logprobs=</span><span class="si">{</span><span class="n">sampling_params</span><span class="o">.</span><span class="n">logprobs</span><span class="si">}</span><span class="s2">` (Top</span><span class="si">{</span><span class="n">sampling_params</span><span class="o">.</span><span class="n">logprobs</span><span class="si">}</span><span class="s2"> logprobs). Please set `logprobs=1` in `sampling_params` instead.&quot;</span>
                 <span class="p">)</span>
             <span class="k">return</span>
-        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;autodeploy&quot;</span><span class="p">:</span>
+        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;_autodeploy&quot;</span><span class="p">:</span>
             <span class="k">return</span>
 
         <span class="n">build_config</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">build_config</span>
@@ -1171,7 +1190,7 @@
             <span class="n">executor_config</span><span class="p">,</span>
             <span class="n">backend</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span><span class="p">,</span>
             <span class="n">pytorch_backend_config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get_pytorch_backend_config</span><span class="p">()</span>
-            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;pytorch&quot;</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;pytorch&quot;</span><span class="p">,</span> <span class="s2">&quot;_autodeploy&quot;</span><span class="p">]</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">mapping</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">parallel_config</span><span class="o">.</span><span class="n">to_mapping</span><span class="p">(),</span>
             <span class="n">build_config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">build_config</span>
             <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_on_trt_backend</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span>
@@ -1218,9 +1237,9 @@
 
         <span class="c1"># TODO smor- need to refine what is the desired behavior if lora is enabled</span>
         <span class="c1"># in terms of the tokenizer initialization process</span>
-        <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="p">,</span> <span class="s2">&quot;backend&quot;</span>
-        <span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;pytorch&quot;</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">lora_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="p">,</span> <span class="s2">&quot;backend&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">backend</span> <span class="ow">in</span> <span class="p">[</span>
+                <span class="s2">&quot;pytorch&quot;</span><span class="p">,</span> <span class="s2">&quot;_autodeploy&quot;</span>
+        <span class="p">]</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">lora_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
             <span class="n">num_lora_dirs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">lora_config</span><span class="o">.</span><span class="n">lora_dir</span><span class="p">)</span>
             <span class="k">if</span> <span class="n">num_lora_dirs</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
                 <span class="n">tokenizer_path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">lora_config</span><span class="o">.</span><span class="n">lora_dir</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
@@ -1427,9 +1446,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/llm_args.html b/latest/_modules/tensorrt_llm/llmapi/llm_args.html
index 3e41a4e02b..c7e10cded3 100644
--- a/latest/_modules/tensorrt_llm/llmapi/llm_args.html
+++ b/latest/_modules/tensorrt_llm/llmapi/llm_args.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -551,7 +551,6 @@
                                  <span class="n">PeftCacheConfig</span> <span class="k">as</span> <span class="n">_PeftCacheConfig</span><span class="p">,</span>
                                  <span class="n">SchedulerConfig</span> <span class="k">as</span> <span class="n">_SchedulerConfig</span><span class="p">)</span> <span class="c1"># isort: skip</span>
 <span class="c1"># isort: on</span>
-<span class="kn">from</span><span class="w"> </span><span class="nn">transformers</span><span class="w"> </span><span class="kn">import</span> <span class="n">PreTrainedTokenizerBase</span>
 
 <span class="c1"># yapf: enable</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">..builder</span><span class="w"> </span><span class="kn">import</span> <span class="n">BuildConfig</span><span class="p">,</span> <span class="n">EngineConfig</span>
@@ -1668,7 +1667,7 @@
             <span class="bp">self</span><span class="o">.</span><span class="n">speculative_model</span>
         <span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">speculative_model</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span>
         <span class="k">if</span> <span class="n">model_obj</span><span class="o">.</span><span class="n">is_local_model</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span>
-                <span class="s1">&#39;pytorch&#39;</span><span class="p">,</span> <span class="s1">&#39;autodeploy&#39;</span>
+                <span class="s1">&#39;pytorch&#39;</span><span class="p">,</span> <span class="s1">&#39;_autodeploy&#39;</span>
         <span class="p">]:</span>
             <span class="c1"># Load parallel_config from the engine.</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">model_format</span> <span class="o">=</span> <span class="n">get_model_format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
@@ -1772,7 +1771,7 @@
 
                 <span class="bp">self</span><span class="o">.</span><span class="n">build_config</span><span class="o">.</span><span class="n">max_draft_len</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="o">.</span><span class="n">max_draft_len</span>
 
-                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="o">!=</span> <span class="s1">&#39;pytorch&#39;</span><span class="p">:</span>
+                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;pytorch&#39;</span><span class="p">,</span> <span class="s1">&#39;_autodeploy&#39;</span><span class="p">]:</span>
                     <span class="n">eagle_config</span> <span class="o">=</span> <span class="n">_EagleConfig</span><span class="p">(</span>
                         <span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="o">.</span><span class="n">eagle_choices</span><span class="p">,</span>
                         <span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="o">.</span><span class="n">greedy_sampling</span><span class="p">,</span>
@@ -1792,7 +1791,7 @@
                         <span class="n">eagle3_one_model</span><span class="p">)</span>
             <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="p">,</span> <span class="n">NGramDecodingConfig</span><span class="p">):</span>
                 <span class="bp">self</span><span class="o">.</span><span class="n">build_config</span><span class="o">.</span><span class="n">speculative_decoding_mode</span> <span class="o">=</span> <span class="n">SpeculativeDecodingMode</span><span class="o">.</span><span class="n">NGRAM</span>
-                <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s1">&#39;pytorch&#39;</span>
+                <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;pytorch&#39;</span><span class="p">,</span> <span class="s1">&#39;_autodeploy&#39;</span><span class="p">]</span>
                 <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="o">.</span><span class="n">prompt_lookup_num_tokens</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="o">.</span><span class="n">max_matching_ngram_size</span> <span class="o">&gt;</span> <span class="mi">0</span>
                 <span class="bp">self</span><span class="o">.</span><span class="n">build_config</span><span class="o">.</span><span class="n">max_draft_len</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">speculative_config</span><span class="o">.</span><span class="n">max_draft_len</span>
                 <span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm._torch.speculative</span><span class="w"> </span><span class="kn">import</span> <span class="n">NGramConfig</span>
@@ -1840,9 +1839,11 @@
                     <span class="s2">&quot;lora_dir is empty, so custom embedding or lm head will not be applied.&quot;</span>
                 <span class="p">)</span>
 
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">enable_lora</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="o">==</span> <span class="s1">&#39;pytorch&#39;</span><span class="p">:</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">enable_lora</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">backend</span> <span class="ow">in</span> <span class="p">[</span>
+                <span class="s1">&#39;pytorch&#39;</span><span class="p">,</span> <span class="s1">&#39;_autodeploy&#39;</span>
+        <span class="p">]:</span>
             <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
-                <span class="s2">&quot;enable_lora is ignored when lora_config is provided for pytorch backend.&quot;</span>
+                <span class="sa">f</span><span class="s2">&quot;enable_lora is ignored when lora_config is provided for </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">backend</span><span class="si">}</span><span class="s2"> backend.&quot;</span>
             <span class="p">)</span>
 
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
@@ -2231,11 +2232,6 @@
     <span class="k">def</span><span class="w"> </span><span class="nf">get_pytorch_backend_config</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;PyTorchConfig&quot;</span><span class="p">:</span>
         <span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm._torch.pyexecutor.config</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyTorchConfig</span>
 
-        <span class="c1"># TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig</span>
-        <span class="c1"># Just a WAR to support the auto_deploy</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">auto_deploy_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">auto_deploy_config</span>
-
         <span class="k">return</span> <span class="n">PyTorchConfig</span><span class="p">(</span>
             <span class="n">extra_resource_managers</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">extra_resource_managers</span><span class="p">,</span>
             <span class="n">use_cuda_graph</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">use_cuda_graph</span><span class="p">,</span>
@@ -2321,7 +2317,7 @@
 <span class="sd">        2. If cuda_graph_batch_sizes is not provided, it is generated based on cuda_graph_max_batch_size</span>
 <span class="sd">        3. If both are provided, cuda_graph_batch_sizes must match the generated values</span>
 <span class="sd">        &quot;&quot;&quot;</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_batch_sizes</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_batch_sizes</span><span class="p">:</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_batch_sizes</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_batch_sizes</span><span class="p">)</span>
             <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_max_batch_size</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
                 <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_batch_sizes</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_generate_cuda_graph_batch_sizes</span><span class="p">(</span>
@@ -2348,6 +2344,109 @@
 
 
 
+<span class="k">class</span><span class="w"> </span><span class="nc">_AutoDeployLlmArgs</span><span class="p">(</span><span class="n">TorchLlmArgs</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;LLM arguments specifically for AutoDeploy backend.</span>
+
+<span class="sd">    This class extends TorchLlmArgs with AutoDeploy-specific configuration options.</span>
+<span class="sd">    AutoDeploy provides automatic deployment and optimization of language models</span>
+<span class="sd">    with various attention backends and optimization strategies.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+
+    <span class="n">model_factory</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span>
+        <span class="s2">&quot;AutoModelForCausalLM&quot;</span><span class="p">,</span> <span class="s2">&quot;AutoModelForImageTextToText&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+            <span class="n">default</span><span class="o">=</span><span class="s2">&quot;AutoModelForCausalLM&quot;</span><span class="p">,</span>
+            <span class="n">description</span><span class="o">=</span><span class="s2">&quot;The model factory to use for loading the model.&quot;</span><span class="p">,</span>
+        <span class="p">)</span>
+
+    <span class="n">model_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default_factory</span><span class="o">=</span><span class="nb">dict</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;Extra kwargs for the model config class to customize the model config. &quot;</span>
+        <span class="s2">&quot;These arguments take precedence over default values or config values in the model config &quot;</span>
+        <span class="s2">&quot;file. Arguments are resolved in order: 1) Default values in model config class, 2) Values &quot;</span>
+        <span class="s2">&quot;in model config file, 3) Values in model_kwargs. Note: if a kwarg doesn&#39;t exist in the &quot;</span>
+        <span class="s2">&quot;model config class, it will be ignored.&quot;</span><span class="p">,</span>
+    <span class="p">)</span>
+
+    <span class="n">mla_backend</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">&quot;MultiHeadLatentAttention&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;MultiHeadLatentAttention&quot;</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;The Multi-Head Latent Attention backend to use.&quot;</span><span class="p">,</span>
+    <span class="p">)</span>
+
+    <span class="n">skip_loading_weights</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;Whether to skip loading model weights during initialization. &quot;</span>
+        <span class="s2">&quot;If True, only the model architecture is loaded.&quot;</span><span class="p">,</span>
+    <span class="p">)</span>
+
+    <span class="n">free_mem_ratio</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;The fraction of available memory to allocate for cache. &quot;</span>
+        <span class="s2">&quot;Must be between 0.0 and 1.0.&quot;</span><span class="p">,</span>
+    <span class="p">)</span>
+
+    <span class="n">simple_shard_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;If True, force simple sharding (all_gather) in tensor parallelism. &quot;</span>
+        <span class="s2">&quot;If False, auto-detect and use column+row (all_reduce) sharding when possible.&quot;</span><span class="p">,</span>
+    <span class="p">)</span>
+
+    <span class="c1"># TODO: Remove this field once tokens_per_block is properly passed through</span>
+    <span class="n">attn_page_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;Page size for attention (tokens_per_block). For TritonWithFlattenedInputs &quot;</span>
+        <span class="s2">&quot;backend, this should equal max_seq_len. Temporary field until tokens_per_block gets &quot;</span>
+        <span class="s2">&quot;properly passed through.&quot;</span><span class="p">,</span>
+    <span class="p">)</span>
+
+    <span class="nd">@field_validator</span><span class="p">(</span><span class="s2">&quot;free_mem_ratio&quot;</span><span class="p">)</span>
+    <span class="nd">@classmethod</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">validate_free_mem_ratio</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Validate that free_mem_ratio is between 0.0 and 1.0.&quot;&quot;&quot;</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="mf">0.0</span> <span class="o">&lt;=</span> <span class="n">v</span> <span class="o">&lt;=</span> <span class="mf">1.0</span><span class="p">:</span>
+            <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+                <span class="sa">f</span><span class="s2">&quot;free_mem_ratio must be between 0.0 and 1.0, got </span><span class="si">{</span><span class="n">v</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">v</span>
+
+    <span class="nd">@print_traceback_on_error</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">model_post_init</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__context</span><span class="p">):</span>
+        <span class="c1"># Modify default values that differ from TorchLlmArgs</span>
+        <span class="n">new_defaults</span> <span class="o">=</span> <span class="p">{</span>
+            <span class="s2">&quot;max_batch_size&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span>
+            <span class="s2">&quot;max_seq_len&quot;</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span>
+            <span class="s2">&quot;attn_backend&quot;</span><span class="p">:</span> <span class="s2">&quot;FlashInfer&quot;</span><span class="p">,</span>
+            <span class="c1"># TODO: Remove this when overlap scheduler is supported (https://github.com/NVIDIA/TensorRT-LLM/issues/4364)</span>
+            <span class="s2">&quot;disable_overlap_scheduler&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
+        <span class="p">}</span>
+        <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v_default</span> <span class="ow">in</span> <span class="n">new_defaults</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+            <span class="k">if</span> <span class="n">k</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">__pydantic_fields_set__</span><span class="p">:</span>
+                <span class="nb">setattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v_default</span><span class="p">)</span>
+
+        <span class="c1"># NOTE: Only call super() after setting the default values since default values should be</span>
+        <span class="c1"># set first.</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">model_post_init</span><span class="p">(</span><span class="n">__context</span><span class="p">)</span>
+
+        <span class="c1"># Handle attn_page_size for TritonWithFlattenedInputs backend</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">attn_backend</span> <span class="o">==</span> <span class="s2">&quot;TritonWithFlattenedInputs&quot;</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">attn_page_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_seq_len</span>
+
+        <span class="c1"># Add max_position_embeddings to model_kwargs</span>
+        <span class="c1"># TODO (lucaslie): this is more HF specific than a generic model_kwargs. Ideally, we can</span>
+        <span class="c1"># move this to the HF model factory but we don&#39;t have access to max_seq_len there right now.</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">model_kwargs</span><span class="p">[</span><span class="s2">&quot;max_position_embeddings&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">max_seq_len</span><span class="p">,</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">model_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;max_position_embeddings&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_seq_len</span><span class="p">),</span>
+        <span class="p">)</span>
+
+    <span class="c1"># TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">get_pytorch_backend_config</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;_AutoDeployLlmArgs&quot;</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Return the _AutoDeployLlmArgs (self) object.&quot;&quot;&quot;</span>
+        <span class="k">return</span> <span class="bp">self</span>
+
+
 <span class="k">def</span><span class="w"> </span><span class="nf">update_llm_args_with_extra_dict</span><span class="p">(</span>
         <span class="n">llm_args</span><span class="p">:</span> <span class="n">Dict</span><span class="p">,</span>
         <span class="n">llm_args_dict</span><span class="p">:</span> <span class="n">Dict</span><span class="p">,</span>
@@ -2530,9 +2629,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/mpi_session.html b/latest/_modules/tensorrt_llm/llmapi/mpi_session.html
index 22c585bdba..461af6cf26 100644
--- a/latest/_modules/tensorrt_llm/llmapi/mpi_session.html
+++ b/latest/_modules/tensorrt_llm/llmapi/mpi_session.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1151,9 +1151,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/baichuan/model.html b/latest/_modules/tensorrt_llm/models/baichuan/model.html
index d90b92f478..86f8ddefd8 100644
--- a/latest/_modules/tensorrt_llm/models/baichuan/model.html
+++ b/latest/_modules/tensorrt_llm/models/baichuan/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -880,9 +880,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/bert/model.html b/latest/_modules/tensorrt_llm/models/bert/model.html
index c02ec73408..11df15dc4a 100644
--- a/latest/_modules/tensorrt_llm/models/bert/model.html
+++ b/latest/_modules/tensorrt_llm/models/bert/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1184,9 +1184,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/bloom/model.html b/latest/_modules/tensorrt_llm/models/bloom/model.html
index 3f1e04df38..ff06cb6cac 100644
--- a/latest/_modules/tensorrt_llm/models/bloom/model.html
+++ b/latest/_modules/tensorrt_llm/models/bloom/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -792,9 +792,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/chatglm/config.html b/latest/_modules/tensorrt_llm/models/chatglm/config.html
index 82d441fa93..415a9bad20 100644
--- a/latest/_modules/tensorrt_llm/models/chatglm/config.html
+++ b/latest/_modules/tensorrt_llm/models/chatglm/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -809,9 +809,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/chatglm/model.html b/latest/_modules/tensorrt_llm/models/chatglm/model.html
index 079b8e1f18..d92b322f3c 100644
--- a/latest/_modules/tensorrt_llm/models/chatglm/model.html
+++ b/latest/_modules/tensorrt_llm/models/chatglm/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1008,9 +1008,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/clip/model.html b/latest/_modules/tensorrt_llm/models/clip/model.html
index 1a6b4656ce..fab1304486 100644
--- a/latest/_modules/tensorrt_llm/models/clip/model.html
+++ b/latest/_modules/tensorrt_llm/models/clip/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -837,9 +837,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/cogvlm/config.html b/latest/_modules/tensorrt_llm/models/cogvlm/config.html
index 725efe1916..85a3f530d5 100644
--- a/latest/_modules/tensorrt_llm/models/cogvlm/config.html
+++ b/latest/_modules/tensorrt_llm/models/cogvlm/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -668,9 +668,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/cogvlm/model.html b/latest/_modules/tensorrt_llm/models/cogvlm/model.html
index 5a72b28a35..909dbe40b5 100644
--- a/latest/_modules/tensorrt_llm/models/cogvlm/model.html
+++ b/latest/_modules/tensorrt_llm/models/cogvlm/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -921,9 +921,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/commandr/model.html b/latest/_modules/tensorrt_llm/models/commandr/model.html
index 344cbc789e..37788273d9 100644
--- a/latest/_modules/tensorrt_llm/models/commandr/model.html
+++ b/latest/_modules/tensorrt_llm/models/commandr/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -819,9 +819,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/dbrx/config.html b/latest/_modules/tensorrt_llm/models/dbrx/config.html
index 914d526e83..56b2c21980 100644
--- a/latest/_modules/tensorrt_llm/models/dbrx/config.html
+++ b/latest/_modules/tensorrt_llm/models/dbrx/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -683,9 +683,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/dbrx/model.html b/latest/_modules/tensorrt_llm/models/dbrx/model.html
index abecf36478..72790b59c6 100644
--- a/latest/_modules/tensorrt_llm/models/dbrx/model.html
+++ b/latest/_modules/tensorrt_llm/models/dbrx/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -809,9 +809,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html b/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html
index e1e4f5805c..d6222f5b3f 100644
--- a/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html
+++ b/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -903,9 +903,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html b/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html
index 237dedb2d0..b5804a925f 100644
--- a/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html
+++ b/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -985,9 +985,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/dit/model.html b/latest/_modules/tensorrt_llm/models/dit/model.html
index 2e62c27bf2..12659590c4 100644
--- a/latest/_modules/tensorrt_llm/models/dit/model.html
+++ b/latest/_modules/tensorrt_llm/models/dit/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1021,9 +1021,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/eagle/model.html b/latest/_modules/tensorrt_llm/models/eagle/model.html
index 8059e80fca..006c51a970 100644
--- a/latest/_modules/tensorrt_llm/models/eagle/model.html
+++ b/latest/_modules/tensorrt_llm/models/eagle/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1957,9 +1957,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/enc_dec/model.html b/latest/_modules/tensorrt_llm/models/enc_dec/model.html
index fac8df8da0..d1f15c0fe4 100644
--- a/latest/_modules/tensorrt_llm/models/enc_dec/model.html
+++ b/latest/_modules/tensorrt_llm/models/enc_dec/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -2862,9 +2862,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/falcon/config.html b/latest/_modules/tensorrt_llm/models/falcon/config.html
index 11d7eeb7ef..8dc9823266 100644
--- a/latest/_modules/tensorrt_llm/models/falcon/config.html
+++ b/latest/_modules/tensorrt_llm/models/falcon/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -744,9 +744,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/falcon/model.html b/latest/_modules/tensorrt_llm/models/falcon/model.html
index 85b1be8036..31f3f8dd7a 100644
--- a/latest/_modules/tensorrt_llm/models/falcon/model.html
+++ b/latest/_modules/tensorrt_llm/models/falcon/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -906,9 +906,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gemma/config.html b/latest/_modules/tensorrt_llm/models/gemma/config.html
index 961dd54eb6..fb6f1f3cdd 100644
--- a/latest/_modules/tensorrt_llm/models/gemma/config.html
+++ b/latest/_modules/tensorrt_llm/models/gemma/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -834,9 +834,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gemma/model.html b/latest/_modules/tensorrt_llm/models/gemma/model.html
index e80fa36866..7ccac91c80 100644
--- a/latest/_modules/tensorrt_llm/models/gemma/model.html
+++ b/latest/_modules/tensorrt_llm/models/gemma/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1026,9 +1026,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gpt/config.html b/latest/_modules/tensorrt_llm/models/gpt/config.html
index 20156f06cd..d6f46985a2 100644
--- a/latest/_modules/tensorrt_llm/models/gpt/config.html
+++ b/latest/_modules/tensorrt_llm/models/gpt/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -953,9 +953,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gpt/model.html b/latest/_modules/tensorrt_llm/models/gpt/model.html
index db4d419722..d3da8d1d08 100644
--- a/latest/_modules/tensorrt_llm/models/gpt/model.html
+++ b/latest/_modules/tensorrt_llm/models/gpt/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1062,9 +1062,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gptj/config.html b/latest/_modules/tensorrt_llm/models/gptj/config.html
index fa05554092..5555ef8050 100644
--- a/latest/_modules/tensorrt_llm/models/gptj/config.html
+++ b/latest/_modules/tensorrt_llm/models/gptj/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -682,9 +682,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gptj/model.html b/latest/_modules/tensorrt_llm/models/gptj/model.html
index 7082784cc5..76da10ab9d 100644
--- a/latest/_modules/tensorrt_llm/models/gptj/model.html
+++ b/latest/_modules/tensorrt_llm/models/gptj/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -834,9 +834,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gptneox/model.html b/latest/_modules/tensorrt_llm/models/gptneox/model.html
index 03c4ff698e..664578088d 100644
--- a/latest/_modules/tensorrt_llm/models/gptneox/model.html
+++ b/latest/_modules/tensorrt_llm/models/gptneox/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -774,9 +774,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/llama/config.html b/latest/_modules/tensorrt_llm/models/llama/config.html
index 8ec04ebf96..bc40035875 100644
--- a/latest/_modules/tensorrt_llm/models/llama/config.html
+++ b/latest/_modules/tensorrt_llm/models/llama/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -908,9 +908,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/llama/model.html b/latest/_modules/tensorrt_llm/models/llama/model.html
index e0986b01f8..92a9b1dc80 100644
--- a/latest/_modules/tensorrt_llm/models/llama/model.html
+++ b/latest/_modules/tensorrt_llm/models/llama/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1256,9 +1256,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mamba/model.html b/latest/_modules/tensorrt_llm/models/mamba/model.html
index 0440f62009..3000da49f2 100644
--- a/latest/_modules/tensorrt_llm/models/mamba/model.html
+++ b/latest/_modules/tensorrt_llm/models/mamba/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1101,9 +1101,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/medusa/config.html b/latest/_modules/tensorrt_llm/models/medusa/config.html
index 8c12f3cecb..bf0037ebf5 100644
--- a/latest/_modules/tensorrt_llm/models/medusa/config.html
+++ b/latest/_modules/tensorrt_llm/models/medusa/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -741,9 +741,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/medusa/model.html b/latest/_modules/tensorrt_llm/models/medusa/model.html
index bc80024bbc..30d46f4f3a 100644
--- a/latest/_modules/tensorrt_llm/models/medusa/model.html
+++ b/latest/_modules/tensorrt_llm/models/medusa/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -891,9 +891,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mllama/model.html b/latest/_modules/tensorrt_llm/models/mllama/model.html
index 5cbf6a0e7f..4bec8cb69d 100644
--- a/latest/_modules/tensorrt_llm/models/mllama/model.html
+++ b/latest/_modules/tensorrt_llm/models/mllama/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -2202,9 +2202,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html b/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html
index ce3902bdff..0bf197a2e3 100644
--- a/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html
+++ b/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1268,9 +1268,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/modeling_utils.html b/latest/_modules/tensorrt_llm/models/modeling_utils.html
index 3ca5f1f7ea..9eb609b2e9 100644
--- a/latest/_modules/tensorrt_llm/models/modeling_utils.html
+++ b/latest/_modules/tensorrt_llm/models/modeling_utils.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -2663,9 +2663,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mpt/model.html b/latest/_modules/tensorrt_llm/models/mpt/model.html
index 3f9a382258..9dbe1eb463 100644
--- a/latest/_modules/tensorrt_llm/models/mpt/model.html
+++ b/latest/_modules/tensorrt_llm/models/mpt/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -806,9 +806,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html b/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html
index 7bd34b393a..a49cf6000b 100644
--- a/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html
+++ b/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -740,9 +740,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html b/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html
index 5fb65b6437..7ac4acc69d 100644
--- a/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html
+++ b/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -808,9 +808,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/opt/model.html b/latest/_modules/tensorrt_llm/models/opt/model.html
index 64822e5f83..fbca940c38 100644
--- a/latest/_modules/tensorrt_llm/models/opt/model.html
+++ b/latest/_modules/tensorrt_llm/models/opt/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -811,9 +811,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/phi/model.html b/latest/_modules/tensorrt_llm/models/phi/model.html
index b5d512d010..238c3572f3 100644
--- a/latest/_modules/tensorrt_llm/models/phi/model.html
+++ b/latest/_modules/tensorrt_llm/models/phi/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -855,9 +855,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/phi3/model.html b/latest/_modules/tensorrt_llm/models/phi3/model.html
index d9f3c283a4..336e05569f 100644
--- a/latest/_modules/tensorrt_llm/models/phi3/model.html
+++ b/latest/_modules/tensorrt_llm/models/phi3/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -951,9 +951,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html b/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html
index 1e6a02647c..45e21a5efb 100644
--- a/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html
+++ b/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1254,9 +1254,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/redrafter/model.html b/latest/_modules/tensorrt_llm/models/redrafter/model.html
index ff2ecd37dd..05e9c831a1 100644
--- a/latest/_modules/tensorrt_llm/models/redrafter/model.html
+++ b/latest/_modules/tensorrt_llm/models/redrafter/model.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -926,9 +926,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/plugin/plugin.html b/latest/_modules/tensorrt_llm/plugin/plugin.html
index ab4d18e833..2eb7c70990 100644
--- a/latest/_modules/tensorrt_llm/plugin/plugin.html
+++ b/latest/_modules/tensorrt_llm/plugin/plugin.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1482,9 +1482,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/quantization/mode.html b/latest/_modules/tensorrt_llm/quantization/mode.html
index 1a57fca377..4336466ba4 100644
--- a/latest/_modules/tensorrt_llm/quantization/mode.html
+++ b/latest/_modules/tensorrt_llm/quantization/mode.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1034,9 +1034,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html b/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
index bffc8235ee..930a2bef5e 100644
--- a/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
+++ b/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1895,9 +1895,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html b/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
index 14a8374cb0..83bf997028 100644
--- a/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
+++ b/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1165,9 +1165,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/generation.html b/latest/_modules/tensorrt_llm/runtime/generation.html
index f55c97392a..2225cc749f 100644
--- a/latest/_modules/tensorrt_llm/runtime/generation.html
+++ b/latest/_modules/tensorrt_llm/runtime/generation.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -5453,9 +5453,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html b/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html
index 84659692e3..8e8a516e53 100644
--- a/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html
+++ b/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1112,9 +1112,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/model_runner.html b/latest/_modules/tensorrt_llm/runtime/model_runner.html
index 2bb6e85224..faaab88636 100644
--- a/latest/_modules/tensorrt_llm/runtime/model_runner.html
+++ b/latest/_modules/tensorrt_llm/runtime/model_runner.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1624,9 +1624,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html b/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html
index 5f129f5cef..3d0401e22c 100644
--- a/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html
+++ b/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1828,9 +1828,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html b/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
index 3418ba3496..b51a6f1121 100644
--- a/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
+++ b/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1440,9 +1440,6 @@
         <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span> <span class="o">==</span> <span class="s1">&#39;pixtral&#39;</span><span class="p">:</span>
             <span class="c1"># Hold on to pixel_values and input_ids.</span>
             <span class="n">dtype</span> <span class="o">=</span> <span class="n">str_dtype_to_torch</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vision_precision</span><span class="p">)</span>
-            <span class="n">pixel_values</span> <span class="o">=</span> <span class="n">image</span><span class="p">[</span><span class="s2">&quot;pixel_values&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
-            <span class="n">input_ids</span> <span class="o">=</span> <span class="n">image</span><span class="p">[</span><span class="s2">&quot;input_ids&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
-
             <span class="c1"># Shape of pixel values from the processor varies with the raw image.</span>
             <span class="c1"># So we create a new tensor with a fixed shape as expected by the vision</span>
             <span class="c1"># encoder and create a corresponding attention mask.</span>
@@ -1450,19 +1447,30 @@
             <span class="n">patch_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">patch_size</span>
             <span class="n">d_min</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">finfo</span><span class="p">(</span><span class="n">dtype</span><span class="p">)</span><span class="o">.</span><span class="n">min</span>
             <span class="n">num_patches</span> <span class="o">=</span> <span class="p">(</span><span class="n">image_size</span> <span class="o">//</span> <span class="n">patch_size</span><span class="p">)</span>
-            <span class="n">image</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">),</span>
-                               <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
-                               <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span>
-                               <span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
-            <span class="n">attention_mask</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="n">num_patches</span><span class="p">,</span> <span class="n">num_patches</span><span class="p">),</span>
-                                        <span class="n">fill_value</span><span class="o">=</span><span class="n">d_min</span><span class="p">,</span>
-                                        <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span>
-                                        <span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
-            <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">pixel_values</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">2</span><span class="p">:]</span>
-            <span class="n">image</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span><span class="n">h</span><span class="p">,</span> <span class="p">:</span><span class="n">w</span><span class="p">]</span> <span class="o">=</span> <span class="n">pixel_values</span>
-            <span class="n">attention_mask</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span><span class="n">h</span> <span class="o">//</span> <span class="n">patch_size</span><span class="p">,</span> <span class="p">:</span><span class="n">w</span> <span class="o">//</span> <span class="n">patch_size</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+            <span class="n">padded_image</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">(</span>
+                <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">),</span>
+                <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+                <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span>
+                <span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
+            <span class="n">padded_attention_mask</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">(</span>
+                <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">num_patches</span><span class="p">,</span> <span class="n">num_patches</span><span class="p">),</span>
+                <span class="n">fill_value</span><span class="o">=</span><span class="n">d_min</span><span class="p">,</span>
+                <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span>
+                <span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
+            <span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">input_ids</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
+            <span class="k">for</span> <span class="n">img_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">):</span>
+                <span class="n">pixel_values</span> <span class="o">=</span> <span class="n">image</span><span class="p">[</span><span class="s2">&quot;pixel_values&quot;</span><span class="p">][</span><span class="n">img_idx</span><span class="p">]</span>
+                <span class="n">img_h</span><span class="p">,</span> <span class="n">img_w</span> <span class="o">=</span> <span class="n">pixel_values</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">2</span><span class="p">:]</span>
+                <span class="n">padded_image</span><span class="p">[</span><span class="n">img_idx</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:</span><span class="n">img_h</span><span class="p">,</span> <span class="p">:</span><span class="n">img_w</span><span class="p">]</span> <span class="o">=</span> <span class="n">pixel_values</span>
+                <span class="n">padded_attention_mask</span><span class="p">[</span><span class="n">img_idx</span><span class="p">,</span> <span class="p">:</span><span class="n">img_h</span> <span class="o">//</span> <span class="n">patch_size</span><span class="p">,</span> <span class="p">:</span><span class="n">img_w</span> <span class="o">//</span>
+                                      <span class="n">patch_size</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+                <span class="n">input_ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">image</span><span class="p">[</span><span class="s2">&quot;input_ids&quot;</span><span class="p">][</span><span class="n">img_idx</span><span class="p">])</span>
+                <span class="n">h</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">img_h</span><span class="p">)</span>
+                <span class="n">w</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">img_w</span><span class="p">)</span>
+
+            <span class="n">image</span> <span class="o">=</span> <span class="n">padded_image</span>
             <span class="n">other_vision_inputs</span> <span class="o">=</span> <span class="p">{</span>
-                <span class="s2">&quot;attention_mask&quot;</span><span class="p">:</span> <span class="n">attention_mask</span><span class="p">,</span>
+                <span class="s2">&quot;attention_mask&quot;</span><span class="p">:</span> <span class="n">padded_attention_mask</span><span class="p">,</span>
             <span class="p">}</span>
         <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span> <span class="o">==</span> <span class="s1">&#39;llava_next&#39;</span><span class="p">:</span>
             <span class="nb">input</span> <span class="o">=</span> <span class="n">image</span>
@@ -1681,12 +1689,29 @@
         <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span> <span class="o">==</span> <span class="s1">&#39;pixtral&#39;</span><span class="p">:</span>
             <span class="n">relevant_patch_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">patch_size</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">spatial_merge_size</span>
             <span class="n">output_img_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_size</span> <span class="o">//</span> <span class="n">relevant_patch_size</span>
-            <span class="n">visual_features</span> <span class="o">=</span> <span class="n">visual_features</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span>
-                <span class="n">output_img_size</span><span class="p">,</span> <span class="n">output_img_size</span><span class="p">,</span>
-                <span class="o">-</span><span class="mi">1</span><span class="p">)[:</span><span class="n">h</span> <span class="o">//</span> <span class="n">relevant_patch_size</span><span class="p">,</span> <span class="p">:</span><span class="n">w</span> <span class="o">//</span>
-                    <span class="n">relevant_patch_size</span><span class="p">]</span><span class="o">.</span><span class="n">flatten</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+            <span class="c1"># Note: max_h * max_w shall serve as the `tokens_per_task` in ptuning prompt table.</span>
+            <span class="n">max_h</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">h</span><span class="p">)</span> <span class="o">//</span> <span class="n">relevant_patch_size</span>
+            <span class="n">max_w</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">w</span><span class="p">)</span> <span class="o">//</span> <span class="n">relevant_patch_size</span>
+            <span class="n">visual_embed_dim</span> <span class="o">=</span> <span class="n">visual_features</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
+            <span class="n">relevant_visual_features</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">,</span>
+                                                   <span class="n">max_h</span> <span class="o">*</span> <span class="n">max_w</span><span class="p">,</span>
+                                                   <span class="n">visual_embed_dim</span><span class="p">)</span>
+            <span class="k">for</span> <span class="n">img_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">):</span>
+                <span class="n">complete_features</span> <span class="o">=</span> <span class="n">visual_features</span><span class="p">[</span><span class="n">img_idx</span><span class="p">]</span>
+                <span class="n">complete_features</span> <span class="o">=</span> <span class="n">complete_features</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span>
+                    <span class="n">output_img_size</span><span class="p">,</span> <span class="n">output_img_size</span><span class="p">,</span> <span class="n">visual_embed_dim</span><span class="p">)</span>
+                <span class="n">relevant_h</span> <span class="o">=</span> <span class="n">h</span><span class="p">[</span><span class="n">img_idx</span><span class="p">]</span> <span class="o">//</span> <span class="n">relevant_patch_size</span>
+                <span class="n">relevant_w</span> <span class="o">=</span> <span class="n">w</span><span class="p">[</span><span class="n">img_idx</span><span class="p">]</span> <span class="o">//</span> <span class="n">relevant_patch_size</span>
+                <span class="n">flattened_features</span> <span class="o">=</span> <span class="n">complete_features</span><span class="p">[:</span><span class="n">relevant_h</span><span class="p">,</span> <span class="p">:</span>
+                                                       <span class="n">relevant_w</span><span class="p">,</span> <span class="p">:]</span><span class="o">.</span><span class="n">flatten</span><span class="p">(</span>
+                                                           <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+                <span class="n">relevant_visual_features</span><span class="p">[</span><span class="n">img_idx</span><span class="p">,</span> <span class="p">:</span><span class="n">relevant_h</span> <span class="o">*</span>
+                                         <span class="n">relevant_w</span><span class="p">,</span> <span class="p">:]</span> <span class="o">=</span> <span class="n">flattened_features</span>
+            <span class="n">visual_features</span> <span class="o">=</span> <span class="n">relevant_visual_features</span>
             <span class="n">input_ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ptuning_setup_pixtral</span><span class="p">(</span><span class="n">input_ids</span><span class="o">=</span><span class="n">input_ids</span><span class="p">)</span>
-            <span class="n">length</span> <span class="o">=</span> <span class="n">input_ids</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+            <span class="c1"># Note: length is not used for pixtral model downstream. Setting it to a list</span>
+            <span class="c1"># of length of input_ids causes errors downstream. So, supplying a placeholder.</span>
+            <span class="n">length</span> <span class="o">=</span> <span class="n">input_ids</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
 
         <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span> <span class="o">==</span> <span class="s1">&#39;llava_next&#39;</span><span class="p">:</span>
             <span class="n">visual_features</span> <span class="o">=</span> <span class="n">LlavaNextUtils</span><span class="o">.</span><span class="n">rearrange_image_features</span><span class="p">(</span>
@@ -2329,7 +2354,7 @@
 <a class="viewcode-back" href="../../../python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index">[docs]</a>
     <span class="k">def</span><span class="w"> </span><span class="nf">get_rope_index</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
-        <span class="n">input_ids</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">,</span>
+        <span class="n">input_ids</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">IntTensor</span><span class="p">,</span>
         <span class="n">image_grid_thw</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">video_grid_thw</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">attention_mask</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
@@ -2361,7 +2386,7 @@
 <span class="sd">                Here we calculate the text start position_ids as the max vision position_ids plus 1.</span>
 
 <span class="sd">        Args:</span>
-<span class="sd">            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):</span>
+<span class="sd">            input_ids (`torch.IntTensor` of shape `(batch_size, sequence_length)`):</span>
 <span class="sd">                Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide</span>
 <span class="sd">                it.</span>
 <span class="sd">            image_grid_thw (`torch.LongTensor` of shape `(num_images, 3)`, *optional*):</span>
@@ -2375,7 +2400,7 @@
 <span class="sd">                - 0 for tokens that are **masked**.</span>
 
 <span class="sd">        Returns:</span>
-<span class="sd">            position_ids (`torch.LongTensor` of shape `(3, batch_size, sequence_length)`)</span>
+<span class="sd">            position_ids (`torch.IntTensor` of shape `(3, batch_size, sequence_length)`)</span>
 <span class="sd">            mrope_position_deltas (`torch.Tensor` of shape `(batch_size)`)</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="n">spatial_merge_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spatial_merge_size</span>
@@ -2594,16 +2619,19 @@
 <a class="viewcode-back" href="../../../python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral">[docs]</a>
     <span class="k">def</span><span class="w"> </span><span class="nf">ptuning_setup_pixtral</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_ids</span><span class="p">):</span>
         <span class="c1"># input_ids obtained from processor has token_ids for text as well as image tokens</span>
-        <span class="c1"># where each image token is represented the same image_token_index (10 for this model).</span>
+        <span class="c1"># where each image token is represented by the same image_token_index.</span>
         <span class="n">image_token_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_token_index</span>
         <span class="n">vocab_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span>
         <span class="c1"># Replace all image tokens with a unique token_id &gt; text_vacab_size.</span>
         <span class="c1"># This shall be used to lookup the prompt table.</span>
-        <span class="n">replacer</span> <span class="o">=</span> <span class="n">vocab_size</span>
-        <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">input_ids</span><span class="p">[</span><span class="mi">0</span><span class="p">])):</span>
-            <span class="k">if</span> <span class="n">input_ids</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="n">image_token_index</span><span class="p">:</span>
-                <span class="n">input_ids</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">replacer</span>
-                <span class="n">replacer</span> <span class="o">+=</span> <span class="mi">1</span>
+        <span class="k">for</span> <span class="n">img_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">):</span>
+            <span class="c1"># Note: We reset replacer to text_vocab_size for each sample. This is as opposed to doing `replacer = vocab_size + img_idx * tokens_per_task`.</span>
+            <span class="c1"># That part of the look-up manipulation is done by the `task_ids` input to PromptEmbedding forward.</span>
+            <span class="n">replacer</span> <span class="o">=</span> <span class="n">vocab_size</span>
+            <span class="k">for</span> <span class="n">token_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">input_ids</span><span class="p">[</span><span class="n">img_idx</span><span class="p">])):</span>
+                <span class="k">if</span> <span class="n">input_ids</span><span class="p">[</span><span class="n">img_idx</span><span class="p">][</span><span class="n">token_idx</span><span class="p">]</span> <span class="o">==</span> <span class="n">image_token_index</span><span class="p">:</span>
+                    <span class="n">input_ids</span><span class="p">[</span><span class="n">img_idx</span><span class="p">][</span><span class="n">token_idx</span><span class="p">]</span> <span class="o">=</span> <span class="n">replacer</span>
+                    <span class="n">replacer</span> <span class="o">+=</span> <span class="mi">1</span>
         <span class="k">return</span> <span class="n">input_ids</span></div>
 
 
@@ -2745,7 +2773,24 @@
                 <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">image_path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
                     <span class="n">image_path</span> <span class="o">=</span> <span class="n">image_path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">path_sep</span><span class="p">)</span>
                 <span class="n">images</span> <span class="o">=</span> <span class="n">load_images</span><span class="p">(</span><span class="n">image_path</span><span class="p">)</span>
-
+        <span class="k">elif</span> <span class="s2">&quot;pixtral&quot;</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span><span class="p">:</span>
+            <span class="k">if</span> <span class="n">image_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+                <span class="n">image_urls</span> <span class="o">=</span> <span class="p">[</span>
+                    <span class="s2">&quot;https://storage.googleapis.com/sfr-vision-language-research/LAVIS/assets/merlion.png&quot;</span><span class="p">,</span>
+                    <span class="s2">&quot;https://www.ilankelman.org/stopsigns/australia.jpg&quot;</span><span class="p">,</span>
+                    <span class="s2">&quot;https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.png&quot;</span><span class="p">,</span>
+                    <span class="s2">&quot;https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg&quot;</span><span class="p">,</span>
+                <span class="p">]</span>
+                <span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">image_urls</span><span class="p">)</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">:</span>
+                    <span class="n">image_urls</span> <span class="o">*=</span> <span class="mi">2</span>
+                <span class="n">image_urls</span> <span class="o">=</span> <span class="n">image_urls</span><span class="p">[:</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">]</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">image_path</span> <span class="o">=</span> <span class="s2">&quot;,&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">image_urls</span><span class="p">)</span>
+                <span class="n">images</span> <span class="o">=</span> <span class="n">load_images</span><span class="p">(</span><span class="n">image_urls</span><span class="p">)</span>
+            <span class="k">else</span><span class="p">:</span>
+                <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">image_path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
+                    <span class="n">image_path</span> <span class="o">=</span> <span class="n">image_path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">path_sep</span><span class="p">)</span>
+                <span class="n">images</span> <span class="o">=</span> <span class="n">load_images</span><span class="p">(</span><span class="n">image_path</span><span class="p">)</span>
+            <span class="n">images</span> <span class="o">=</span> <span class="p">[</span><span class="n">images</span><span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="k">else</span> <span class="n">images</span>
         <span class="k">elif</span> <span class="s2">&quot;nougat&quot;</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span><span class="p">:</span>
             <span class="n">filepath</span> <span class="o">=</span> <span class="n">hf_hub_download</span><span class="p">(</span>
                 <span class="n">repo_id</span><span class="o">=</span><span class="s2">&quot;hf-internal-testing/fixtures_docvqa&quot;</span><span class="p">,</span>
@@ -2998,9 +3043,15 @@
             <span class="n">post_prompt</span> <span class="o">=</span> <span class="s2">&quot;[/INST]&quot;</span>
             <span class="n">prompt</span> <span class="o">=</span> <span class="n">pre_prompt</span> <span class="o">+</span> <span class="n">input_text</span> <span class="o">+</span> <span class="n">post_prompt</span>
             <span class="n">dtype</span> <span class="o">=</span> <span class="n">str_dtype_to_torch</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vision_precision</span><span class="p">)</span>
-            <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">processor</span><span class="p">(</span><span class="n">text</span><span class="o">=</span><span class="n">prompt</span><span class="p">,</span>
-                                   <span class="n">images</span><span class="o">=</span><span class="p">[</span><span class="n">raw_image</span><span class="p">],</span>
-                                   <span class="n">return_tensors</span><span class="o">=</span><span class="s2">&quot;pt&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">dtype</span><span class="p">)</span>
+            <span class="n">image</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;pixel_values&#39;</span><span class="p">:</span> <span class="p">[],</span> <span class="s1">&#39;input_ids&#39;</span><span class="p">:</span> <span class="p">[]}</span>
+            <span class="k">for</span> <span class="n">img_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">):</span>
+                <span class="n">image_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">processor</span><span class="p">(</span><span class="n">text</span><span class="o">=</span><span class="n">prompt</span><span class="p">,</span>
+                                            <span class="n">images</span><span class="o">=</span><span class="p">[</span><span class="n">raw_image</span><span class="p">[</span><span class="n">img_idx</span><span class="p">]],</span>
+                                            <span class="n">return_tensors</span><span class="o">=</span><span class="s2">&quot;pt&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">dtype</span><span class="p">)</span>
+                <span class="n">image</span><span class="p">[</span><span class="s1">&#39;pixel_values&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">image_info</span><span class="p">[</span><span class="s1">&#39;pixel_values&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">))</span>
+                <span class="n">image</span><span class="p">[</span><span class="s1">&#39;input_ids&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">image_info</span><span class="p">[</span><span class="s1">&#39;input_ids&#39;</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">))</span>
 
         <span class="k">elif</span> <span class="s1">&#39;internvl&#39;</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span><span class="p">:</span>
             <span class="n">pre_prompt</span> <span class="o">=</span> <span class="s2">&quot;&lt;|system|&gt;</span><span class="se">\n</span><span class="s2">你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。&lt;|end|&gt;&lt;|user|&gt;</span><span class="se">\n</span><span class="s2">&lt;image&gt;</span><span class="se">\n</span><span class="s2">&quot;</span>
@@ -3204,7 +3255,9 @@
                         <span class="n">image</span> <span class="o">=</span> <span class="n">image</span><span class="o">.</span><span class="n">expand</span><span class="p">(</span>
                             <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">input_text</span><span class="p">)),</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
                             <span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">contiguous</span><span class="p">()</span>
-        <span class="k">if</span> <span class="n">image</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="c1"># Note: For pixtral model, image is a dict with each value being a list of tensors.</span>
+        <span class="c1"># Moving to device is handled above. So, it&#39;s safe to skip this for pixtral.</span>
+        <span class="k">if</span> <span class="n">image</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="s1">&#39;pixtral&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_type</span><span class="p">:</span>
             <span class="n">image</span> <span class="o">=</span> <span class="n">image</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
         <span class="c1"># Generate decoder_input_ids for enc-dec models</span>
         <span class="c1"># Custom prompts can be added as:</span>
@@ -3354,9 +3407,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/session.html b/latest/_modules/tensorrt_llm/runtime/session.html
index 4c54b2be61..bdd196035a 100644
--- a/latest/_modules/tensorrt_llm/runtime/session.html
+++ b/latest/_modules/tensorrt_llm/runtime/session.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -972,9 +972,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/sampling_params.html b/latest/_modules/tensorrt_llm/sampling_params.html
index 24f7438145..68539fa6ae 100644
--- a/latest/_modules/tensorrt_llm/sampling_params.html
+++ b/latest/_modules/tensorrt_llm/sampling_params.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1106,9 +1106,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/_sources/architecture/core-concepts.md.txt b/latest/_sources/architecture/core-concepts.md.txt
index 4534eccf3f..3f7cfd558d 100644
--- a/latest/_sources/architecture/core-concepts.md.txt
+++ b/latest/_sources/architecture/core-concepts.md.txt
@@ -4,24 +4,24 @@
 
 TensorRT-LLM has a Model Definition API that can be used to define
 Large Language Models. This API is built on top of the powerful
-[TensorRT Python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html#)
+[TensorRT Python API](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/index.html)
 to create graph representations of deep neural networks in TensorRT. To become
 familiar with the core concepts of the TensorRT API, refer to the
-[Core Concepts](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/coreConcepts.html)
+[Core Concepts](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/coreConcepts.html)
 section of the TensorRT documentation before proceeding further.
 
 In TensorRT-LLM, the [`tensorrt_llm.Builder`](source:tensorrt_llm/builder.py) class
 contains a
-[`tensorrt.Builder`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder)
+[`tensorrt.Builder`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1)
 object. That instance is used in the `tensorrt_llm.Builder.create_network`
 method to create an instance of the
-[`tensorrt.INetworkDefinition`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Network.html#tensorrt.INetworkDefinition)
+[`tensorrt.INetworkDefinition`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html#tensorrt.INetworkDefinition)
 class. The `INetworkDefinition` object can then be populated using the free
 functions defined in the
 [`tensorrt_llm.functional`](source:tensorrt_llm/functional.py).
 
 A simple example of such a free function is `tensorrt_llm.activation` that inserts a
-[`tensorrt.IActivationLayer`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Layers.html#tensorrt.IActivationLayer)
+[`tensorrt.IActivationLayer`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Layers.html#tensorrt.IActivationLayer)
 node in the graph of the model:
 
 ```python
@@ -56,23 +56,23 @@ def silu(input: Tensor) -> Tensor:
 When the TensorRT-LLM's Model Definition API is utilized, a graph of the network is
 assembled.  The graph can later be traversed or transformed using the graph
 traversal API exposed by the
-[`tensorrt.ILayer`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/LayerBase.html#tensorrt.ILayer)
+[`tensorrt.ILayer`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/LayerBase.html#tensorrt.ILayer)
 class. That graph will also be optimized by TensorRT during the compilation of
 the engine, as explained in the next section.
 
 # Compilation
 
 Once populated, the instance of the
-[`tensorrt.INetworkDefinition`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Network.html#tensorrt.INetworkDefinition),
+[`tensorrt.INetworkDefinition`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html#tensorrt.INetworkDefinition),
 can be compiled into an efficient engine by the
-[`tensorrt.Builder`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder)
+[`tensorrt.Builder`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1)
 In TensorRT-LLM, it is done through the `build_engine` member function of the
 `tensorrt_llm.Builder` class that calls the
-[`build_serialized_network`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder.build_serialized_network)
+[`build_serialized_network`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#tensorrt.Builder.build_serialized_network
 method of the
-[`tensorrt.Builder`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder)
+[`tensorrt.Builder`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1)
 object. That call, if everything works as expected, produces an instance of the
-[`tensorrt.IHostMemory`](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/FoundationalTypes/HostMemory.html#tensorrt.IHostMemory)
+[`tensorrt.IHostMemory`](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/FoundationalTypes/HostMemory.html#tensorrt.IHostMemory)
 class. That object is an optimized TensorRT engine that can be stored as a
 binary file.
 
diff --git a/latest/_sources/blogs/H100vsA100.md.txt b/latest/_sources/blogs/H100vsA100.md.txt
index bdffe3fe74..bd87dc718a 100644
--- a/latest/_sources/blogs/H100vsA100.md.txt
+++ b/latest/_sources/blogs/H100vsA100.md.txt
@@ -4,7 +4,7 @@
 
 # H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token
 
-TensorRT-LLM evaluated on both Hopper and Ampere shows **H100 FP8 is up to 4.6x max throughput and 4.4x faster 1st token latency than A100**. H100 FP8 is able to achieve over 10,000 output tok/s at [peak throughput](https://nvidia.github.io/TensorRT-LLM/performance.html#h100-gpus-fp8) for 64 concurrent requests, while maintaining a 1st token latency of 100ms.  For [min-latency](https://nvidia.github.io/TensorRT-LLM/performance.html#id1) applications, TRT-LLM H100 can achieve less than 10ms to 1st token latency.
+TensorRT-LLM evaluated on both Hopper and Ampere shows **H100 FP8 is up to 4.6x max throughput and 4.4x faster 1st token latency than A100**. H100 FP8 is able to achieve over 10,000 output tok/s at peak throughput for 64 concurrent requests, while maintaining a 1st token latency of 100ms. For min-latency applications, TRT-LLM H100 can achieve less than 10ms to 1st token latency.
 
 
 <img src="https://github.com/NVIDIA/TensorRT-LLM/blob/rel/docs/source/blogs/media/TRT_LLM_v0-5-0_H100vA100_tps.png?raw=true" alt="max throughput" width="500" height="auto">
@@ -28,7 +28,7 @@ TensorRT-LLM evaluated on both Hopper and Ampere shows **H100 FP8 is up to 4.6x
 
 <sub>FP8 H100, FP16 A100, SXM 80GB GPUs, TP1, ISL/OSL's provided, TensorRT-LLM v0.5.0., TensorRT 9.1</sub>
 
-The full data behind these charts & tables and including larger models with higher TP values can be found in TensorRT-LLM's [Performance Documentation](https://nvidia.github.io/TensorRT-LLM/performance.html#performance-of-tensorrt-llm)
+The full data behind these charts & tables and including larger models with higher TP values can be found in TensorRT-LLM's [Performance Documentation](https://nvidia.github.io/TensorRT-LLM/latest/performance/perf-overview.html)
 
 Stay tuned for a highlight on Llama coming soon!
 
diff --git a/latest/_sources/blogs/H200launch.md.txt b/latest/_sources/blogs/H200launch.md.txt
index 58f5c08781..baa4905613 100644
--- a/latest/_sources/blogs/H200launch.md.txt
+++ b/latest/_sources/blogs/H200launch.md.txt
@@ -21,7 +21,7 @@ TensorRT-LLM evaluation of the [new H200 GPU](https://nvidianews.nvidia.com/news
 
 <sup>*(1) Largest batch supported on given TP configuration by power of 2.*</sup> <sup>*(2) TP = Tensor Parallelism*</sup>
 
-Additional Performance data is available on the [NVIDIA Data Center Deep Learning Product Performance](https://developer.nvidia.com/deep-learning-performance-training-inference/ai-inference) page, & soon in [TensorRT-LLM's Performance Documentation](https://nvidia.github.io/TensorRT-LLM/performance.html).
+Additional Performance data is available on the [NVIDIA Data Center Deep Learning Product Performance](https://developer.nvidia.com/deep-learning-performance-training-inference/ai-inference) page, & soon in [TensorRT-LLM's Performance Documentation](https://nvidia.github.io/TensorRT-LLM/latest/performance/perf-overview.html).
 
 ### H200 vs H100
 
diff --git a/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt b/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt
index b43b8ed004..201c3781a8 100644
--- a/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt
+++ b/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt
@@ -2,37 +2,39 @@
 by NVIDIA TensorRT-LLM team
 ## Table of Contents
 
-- [Background](#background)
-- [Implementation Configuration](#implementation-configuration)
-  - [Workload Profile](#workload-profile)
-  - [Model Architecture](#model-architecture)
-  - [Precision Strategy](#precision-strategy)
-  - [Parallelism Strategy](#parallelism-strategy)
-  - [Everything in One Diagram](#everything-in-one-diagram)
-- [Key Optimizations](#key-optimizations)
-  - [System Level optimizations](#system-level-optimizations)
-    - [CUDA Graph & Programmatic Dependent Launch](#cuda-graph--programmatic-dependent-launch)
-    - [MTP](#mtp)
-      - [Autoregressive MTP Layers](#autoregressive-mtp-layers)
-      - [Relax Acceptance Verification](#relax-acceptance-verification)
-    - [Multi-streams](#multi-streams)
-    - [Sparse Experts as GEMMs](#sparse-experts-as-gemms-only-works-when-moe_backendcutlass)
-    - [Re-balanced the sparse experts](#re-balanced-the-sparse-experts)
-      - [Mixed ETP](#mixed-etp)
-      - [Smart Router](#smart-router)
-  - [Kernel Level optimizations](#kernel-level-optimizations)
-    - [Attention Kernel](#attention-kernel)
-    - [Grouped GEMM](#grouped-gemm)
-      - [CUTLASS Backend](#cutlass-backend-default-backend)
-      - [TRTLLM Backend](#trtllm-backend)
-    - [Communication Kernel](#communication-kernel)
-    - [Dense GEMM optimization](#dense-gemm-optimization)
-      - [Fuse_A_GEMM](#fuse_a_gemm)
-      - [RouterGEMM](#routergemm)
-    - [Kernel fusion](#kernel-fusion)
-- [How to reproduce](#how-to-reproduce)
-- [Future Works](#future-works)
-- [Acknowledgment](#acknowledgment)
+- [Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs](#pushing-latency-boundaries-optimizing-deepseek-r1-performance-on-nvidia-b200-gpus)
+  - [Table of Contents](#table-of-contents)
+  - [Background](#background)
+  - [Implementation Configuration](#implementation-configuration)
+    - [Workload Profile](#workload-profile)
+    - [Model Architecture](#model-architecture)
+    - [Precision Strategy](#precision-strategy)
+    - [Parallelism Strategy](#parallelism-strategy)
+    - [Everything in One Diagram](#everything-in-one-diagram)
+  - [Key Optimizations](#key-optimizations)
+    - [System Level optimizations](#system-level-optimizations)
+      - [CUDA Graph \& Programmatic Dependent Launch](#cuda-graph--programmatic-dependent-launch)
+      - [MTP](#mtp)
+        - [Autoregressive MTP Layers](#autoregressive-mtp-layers)
+        - [Relax Acceptance Verification](#relax-acceptance-verification)
+      - [Multi-streams](#multi-streams)
+      - [Sparse Experts as GEMMs (only works when moe\_backend=CUTLASS)](#sparse-experts-as-gemms-only-works-when-moe_backendcutlass)
+      - [Re-balanced the sparse experts](#re-balanced-the-sparse-experts)
+        - [Mixed ETP](#mixed-etp)
+        - [Smart Router](#smart-router)
+    - [Kernel Level optimizations](#kernel-level-optimizations)
+      - [Attention Kernel](#attention-kernel)
+      - [Grouped GEMM](#grouped-gemm)
+        - [CUTLASS Backend (default backend)](#cutlass-backend-default-backend)
+        - [TRTLLM Backend](#trtllm-backend)
+      - [Communication Kernel](#communication-kernel)
+      - [Dense GEMM optimization](#dense-gemm-optimization)
+        - [Fuse\_A\_GEMM](#fuse_a_gemm)
+        - [RouterGEMM](#routergemm)
+      - [Kernel fusion](#kernel-fusion)
+  - [How to reproduce](#how-to-reproduce)
+  - [Future Works](#future-works)
+  - [Acknowledgment](#acknowledgment)
 
 ## Background
 Recent advancements in Large Language Reasoning Models have demonstrated remarkable success, while creating new deployment challenges. A critical challenge emerges from extended Output Sequence Lengths (OSL) due to complex "thinking and reasoning" processes. Longer OSL demands stricter Token-to-Token Latency (TTL) requirements, often forcing concurrency limitations. The most extreme case, single concurrency (min-latency scenario) , becomes particularly challenging for real-time applications.
diff --git a/latest/_sources/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md.txt b/latest/_sources/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md.txt
index 0014f1c7f2..8fc2b51643 100644
--- a/latest/_sources/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md.txt
+++ b/latest/_sources/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md.txt
@@ -1,26 +1,28 @@
 # DeepSeek R1 MTP Implementation and Optimization
 by NVIDIA TensorRT-LLM team
 ## Table of Contents
-- [MTP for inference](#mtp-for-inference)
-  - [Background](#background)
-  - [MTP Vanilla](#mtp-vanilla)
-  - [MTP Eagle](#mtp-eagle)
-- [MTP implementation in TensorRT-LLM](#mtp-implementation-in-tensorrt-llm)
-  - [Basic Implementation](#basic-implementation)
-  - [MTP Modules](#mtp-modules)
-  - [Attention for MTP](#attention-for-mtp)
-  - [How to run DeepSeek models with MTP](#how-to-run-deepseek-models-with-mtp)
-- [MTP optimization - Relaxed Acceptance](#mtp-optimization---relaxed-acceptance)
-  - [Relaxed Acceptance](#relaxed-acceptance)
-  - [How to run the DeepSeek-R1 model with Relaxed Acceptance](#how-to-run-the-deepseek-r1-model-with-relaxed-acceptance)
-- [Evaluation](#evaluation)
-  - [Achieving speedup with MTP speculative decoding](#achieving-speedup-with-mtp-speculative-decoding)
-  - [Accuracy studies for Relaxed Acceptance](#accuracy-studies-for-relaxed-acceptance)
-- [Future Works](#future-works)
-  - [Tree-based speculative decoding support](#tree-based-speculative-decoding-support)
-  - [Eagle3 support](#eagle3-support)
-  - [Fix known issues](#fix-known-issues)
-- [Acknowledgment](#acknowledgment)
+- [DeepSeek R1 MTP Implementation and Optimization](#deepseek-r1-mtp-implementation-and-optimization)
+  - [Table of Contents](#table-of-contents)
+  - [MTP for inference](#mtp-for-inference)
+    - [Background](#background)
+    - [MTP Vanilla](#mtp-vanilla)
+    - [MTP Eagle](#mtp-eagle)
+  - [MTP implementation in TensorRT-LLM](#mtp-implementation-in-tensorrt-llm)
+    - [Basic Implementation](#basic-implementation)
+    - [MTP Modules](#mtp-modules)
+    - [Attention for MTP](#attention-for-mtp)
+    - [How to run DeepSeek models with MTP](#how-to-run-deepseek-models-with-mtp)
+  - [MTP optimization - Relaxed Acceptance](#mtp-optimization---relaxed-acceptance)
+    - [Relaxed Acceptance](#relaxed-acceptance)
+    - [How to run the DeepSeek-R1 model with Relaxed Acceptance](#how-to-run-the-deepseek-r1-model-with-relaxed-acceptance)
+  - [Evaluation](#evaluation)
+    - [Achieving speedup with MTP speculative decoding](#achieving-speedup-with-mtp-speculative-decoding)
+    - [Accuracy studies for Relaxed Acceptance](#accuracy-studies-for-relaxed-acceptance)
+  - [Future Works](#future-works)
+    - [Tree-based speculative decoding support](#tree-based-speculative-decoding-support)
+    - [Eagle3 support](#eagle3-support)
+    - [Fix known issues](#fix-known-issues)
+  - [Acknowledgment](#acknowledgment)
 
 
 TensorRT-LLM achieves world-record inference performance for DeepSeek-R1 on NVIDIA Blackwell GPUs, where Multi-Token Prediction (MTP) delivers a significant speedup. In our [previous blog post](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md), we discussed the key optimizations that enable the outstanding inference latency of the DeepSeek-R1 model. This article dives deeper into the implementation and optimization of MTP in TensorRT-LLM.
diff --git a/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt b/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
index 15b418f9b5..0de54f69fb 100644
--- a/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
+++ b/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
@@ -2,6 +2,8 @@
 
 By NVIDIA TensorRT-LLM team
 ## Table of Contents
+- [Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers](#optimizing-deepseek-r1-throughput-on-nvidia-blackwell-gpus-a-deep-dive-for-developers)
+  - [Table of Contents](#table-of-contents)
   - [Introduction](#introduction)
   - [Precision strategy](#precision-strategy)
   - [Parallel strategy](#parallel-strategy)
diff --git a/latest/_sources/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md.txt b/latest/_sources/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md.txt
new file mode 100644
index 0000000000..a71bc0d037
--- /dev/null
+++ b/latest/_sources/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md.txt
@@ -0,0 +1,715 @@
+# Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)
+
+By NVIDIA TensorRT-LLM Team
+
+## Table of Contents
+- [Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)](#scaling-expert-parallelism-in-tensorrt-llmpart-1-design-and-implementation-of-large-scale-ep)
+  - [Table of Contents](#table-of-contents)
+  - [Motivation for large-scale EP](#motivation-for-large-scale-ep)
+    - [Observations over one machine translation dataset](#observations-over-one-machine-translation-dataset)
+    - [Observation over GSM8K dataset](#observation-over-gsm8k-dataset)
+  - [High-level design introduction](#high-level-design-introduction)
+  - [EP communication kernels](#ep-communication-kernels)
+    - [Motivation of EP communication kernels for GB200](#motivation-of-ep-communication-kernels-for-gb200)
+    - [EP communication kernels implementation](#ep-communication-kernels-implementation)
+  - [EP Load Balancer](#ep-load-balancer)
+    - [Python Interface](#python-interface)
+    - [C++ extension](#c-extension)
+    - [Core implementations of host side logics](#core-implementations-of-host-side-logics)
+    - [Core implementations of GPU side logics](#core-implementations-of-gpu-side-logics)
+    - [Online EP Load Balancer](#online-ep-load-balancer)
+    - [Offline EP Load Balancer](#offline-ep-load-balancer)
+  - [E2E evaluation](#e2e-evaluation)
+    - [The effect of EP Load Balancer](#the-effect-of-ep-load-balancer)
+      - [Offline EP Load Balancer](#offline-ep-load-balancer-1)
+      - [Online EP Load Balancer](#online-ep-load-balancer-1)
+    - [Performance study](#performance-study)
+  - [Reproducing steps](#reproducing-steps)
+    - [The effect of EP Load Balancer](#the-effect-of-ep-load-balancer-1)
+        - [Step 1: Run inference and collect statistics](#step-1-run-inference-and-collect-statistics)
+        - [Step 2: Generate the EPLB configuration](#step-2-generate-the-eplb-configuration)
+        - [Step 3: Run inference with the EPLB configuration](#step-3-run-inference-with-the-eplb-configuration)
+    - [Miscellaneous](#miscellaneous)
+  - [Expanded thoughts](#expanded-thoughts)
+  - [Acknowledgement](#acknowledgement)
+
+The development of model like DeepSeek-V3/R1, which use large-scale fine-grained Mixture-of-Experts (MoE) designs, has significantly advanced open-source model quality. Newly released open-source models such as LLaMA4 and Qwen3 also adopt a similar large-scale fine-grained MoE design principle. However, large-scale MoE models introduce new challenges for inference systems, including high memory demands and inherent expert-level workload imbalance.
+
+In the past, we have shared TensorRT-LLM’s optimization experience to [push the latency boundary](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md) of DeepSeek R1 model, [the implementation and optimization of MTP](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md)(Multi-Token Prediction) and [the optimizations for DeepSeek R1 throughput oriented performance](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md).
+
+The DeepSeek team has also shared their valuable experience and practice on how to optimize this kind of large-scale Expert Parallelism (EP) model, including [DeepEP](https://github.com/deepseek-ai/DeepEP) and [EPLB](https://github.com/deepseek-ai/EPLB). Also, the DeepSeek team has shared their concrete design considerations in [this](https://arxiv.org/abs/2412.19437) tech report. On top of those great sharings, there are also nice community efforts to implement large-scale EP in other inference engines, such as [this](https://lmsys.org/blog/2025-05-05-large-scale-ep/) effort from the SGLang team.
+
+In this tech blog, we will introduce the details of the design and implementation to support E2E large-scale EP in TensorRT-LLM. This blog post mainly covers the following:
+
+* How to leverage NVIDIA GB200 Multi-Node NVLink (MNNVL) HW features to implement high-performance communication kernels.
+* How to design and implement an online expert workload balancer to dynamically balance the expert load distribution and adapt to the changes of online traffic patterns. We present:
+  * The empirical data analysis demonstrating the need to do so.
+  * The implementation of the online traffic data statistic module.
+  * The design and implementation of the replication/placement strategy.
+  * The MoE weight load/re-distributer to balance the online workload across multiple GPUs.
+  * The changes needed to the MoE router and computation module to adapt to the expert load balancer needs.
+  * Some preliminary data demonstrating the effectiveness of the current implementation in TensorRT-LLM.
+
+In future tech blogs, we will also cover the following topics:
+* The introduction of performance tuning and optimization for TensorRT-LLM large-scale EP GB200 implementation.
+* How to implement efficient large-scale EP support for B200/Hopper and other NVIDIA GPUs without MNNVL.
+* The best practices to leverage large-scale EP and get performance gains.
+* How to combine large-scale EP with other system optimization techniques.
+
+
+Even if, in this tech blog, we focus on TensorRT-LLM, we believe the core ideas and implementation can also be applied to other inference engines to help the inference performance on NVIDIA GPUs. Also, with the help of the community, we would like to figure out how to better modularize the current TensorRT-LLM large-scale EP implementation and make it more easily reusable by the community.
+
+Finally, in this tech blog, there are implementation details which are targeted towards the GB200 system, such as the communication components leveraging the GB200 MNNVL inter-GPU connection, and the MoE weight load/re-distributer module leveraging the high bandwidth C2C connection between Grace CPU and Blackwell GPU. Nevertheless, the overall design principle and software architecture can still apply to non-GB200 NVIDIA GPU systems. To facilitate the extension to other non-GB200 system, we have, on purpose, paid attention to the generalization of the design and implementation. These changes should be easily composable with other existing components.
+
+## Motivation for large-scale EP
+
+
+The main motivation of introducing large-scale EP (here means EP \> 8\) comes from the following system considerations:
+
+* We expect to reduce the execution latency thanks to the increased aggregated memory bandwidth to load the expert weights.
+* We expect to increase the effective batch size to saturate the GPU computing power.
+
+Note that **when the E2E execution time is dominated by the MoE GroupGEMM computation, by introducing large-scale EP, it is expected to see clear performance benefits. But if the E2E execution time is not dominated by the MoE GroupGEMM computation, then large-scale EP may bring limited performance benefit.**
+
+
+Also there isn't free lunch in the system design. When the EP size increases up to greater than 8 (sometimes even less than 8), due to the sparsity execution nature of MoE models, it can inherently trigger the EP-level workload imbalance issue.
+
+And here are some empirical observations based on some datasets (*all the analyses below are done with the **DeepSeek R1 model**, on **32 GB200 GPUs**).*
+
+### Observations over one machine translation dataset
+
+Firstly let’s have an overview of the overall imbalance issues across layers:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture1.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 1: The routed token count from rank 0 to all the ranks(including rank 0), for decode iteration 1950, and all the MoE layers</em></sub></p>
+
+In Figure 1, it can be seen clearly that for the MoE in layer 36, many more tokens are sent from **rank 0** to **rank 13\.**
+
+If we zoom on the MoE in the layer 36 and record its activated expert rank distribution, there clearly is a rank that is more heavily activated:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture2.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 2: The tokens received for each expert rank for layer 36</em></sub></p>
+
+If we flatten the data to see the routed tokens for each expert, we can see that a few experts are more active than others:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture3.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 3: The tokens received for each expert for layer 36</em></sub></p>
+
+It is also interesting to see that this kind of imbalance issue is very stable across multiple iterations, as shown on the following figure:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture4.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 4: The accumulated token counts received for each expert for layer 36, within 50 decode steps, and the local batch size=256.</em></sub></p>
+
+Clearly, the hot experts in Figure 4 are actually the same as in Figure 3 which only have data for a single decode iteration.
+We have also done the duration-based analysis for local batch size=1 which correspond to a single request with observing the similar pattern:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture5.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 5: The accumulated token counts received for each expert  for layer 36, within 400 decode iterations, and the local batch size \= 1\.</em></sub></p>
+
+To conclude the findings from this study over this machine translation dataset, we could say that:
+
+* There are hot spots in some layers where the workload of some EP ranks can be much higher than others.
+* This may be caused by the hottest expert or some hot experts to be located on the same rank.
+* The routed token distributions can be the same for tens to hundreds of iteration steps or even more.
+* For the execution of a single request, it also has the same hot experts between steps.
+
+And another natural question is whether the above observation can change significantly on other datasets. So we have done a similar analysis with the GSM8K dataset.
+
+### Observation over GSM8K dataset
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture6.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 6: The routed token count from rank 0 to all the ranks, for iteration 1950, and all the MoE layers</em></sub></p>
+
+In Figure 6, compared with Figure 1, it can be seen that for GSM8K, the hot layer becomes layer 57 instead of layer 36\. Then what about the concrete status of layer 36 for the GSM8K dataset?
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture7.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 7: routed token counts from EP rank 0 to other EP ranks, still taking the iteration 1950, MoE layer 36 as the example</em></sub></p>
+
+Clearly from Figure 7, it can be observed that the workload imbalance is different from what was observed for the different dataset (in Figure 2).
+Based on Figure 8, it can be observed that the workload imbalance is relatively stable across multiple iterations on the GSM8K dataset too. It is the same as the previous machine translation dataset.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture8.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 8: The accumulated token counts sent from EP Rank 0 to all the ranks, for MoE layer 57 within 50 decode steps, local batch size=256</em></sub></p>
+
+If we flatten the EP rank level data to expert-level data, we can have the following figure.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture9.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 9: The accumulated token counts received for each expert for layer 57, within 50 decode steps, and the local batch size=256.</em></sub></p>
+
+The similar imbalance pattern also exists for a single request.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture10.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 10: The accumulated token counts received for each expert for layer 57, within 400 decode steps, for a single request</em></sub></p>
+
+If we use another request, then we can still observe the expert imbalance issue, while the hot experts can be different with some in common (in this example it is expert 10).
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture11.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 11: The accumulated token counts received for each expert for layer 57, within 400 decode steps, for a single request</em></sub></p>
+
+So combining the data analysis of two datasets, we have the following findings:
+
+* EP level workload imbalance issue is common for large-scale EP inference on multiple datasets. And the EP imbalance severity can be different per layer. Also the EP imbalance issue is dataset sensitive.
+* The EP rank level imbalance issue can be caused by a certain hottest expert or multiple hot experts staying on the same EP rank.
+* The EP rank imbalance distribution is relatively stable across tens to hundreds of iterations.
+* Though there is time-dimension stability of EP rank imbalance distribution, clearly different requests can have different EP imbalance distribution.
+
+Based on these findings, they can lead to our design consideration of TensorRT-LLM’s large-scale EP implementation:
+
+* By design the EP imbalance issue needs to be considered to assure great E2E performance.
+* Online EP Load Balancer(rather than only a Offline EP Load Balancer implementation) based on the real-time online request traffic is essential to ensure the robustness of EP balancer.
+* The time-dimension stability of EP rank imbalance distribution can be leveraged to re-distribute the MoE weights to different EP ranks in an efficient manner.
+
+In the next section we will illustrate the high-level design.
+
+## High-level design introduction
+
+Based on the detailed analysis and study in section [Motivation of large-scale EP](#motivation-of-large-scale-ep), it can clearly be observed that expert imbalance in EP is a common pattern for large-scale EP. This EP imbalance can clearly impede the overall system performance in the following ways:
+
+* The hot EP rank will consume more memory (for activations) which can limit the effective max batch size scheduled during the inference process.
+* More data will be sent to/received from the hot EP rank.
+
+Those issues can clearly result into a system-level congestion effect in which the hot EP rank will delay the overall E2E execution.
+
+To make sure large-scale EP can run well, careful considerations are needed to minimize the EP imbalance issue. The overall design is as follows:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture12.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 12: the high-level design of TensorRT-LLM large-scale EP</em></sub></p>
+
+In this design, there are both CPU and GPU side logics:
+
+* CPU side
+  * Implement the Replication \& Placement algorithms **(Replication \& Placement Compute** component) to achieve a more balanced EP strategy. Those are rather classical algorithms for which CPU computation is more suitable. Furthermore, by offloading this computation to the CPU, the interference with the GPU can be reduced. In the future, machine-learning based algorithms may also be explored and additional design consideration may be needed. The **Replication \& Placement Compute** component will generate the **“Placement Info”** which will then be consumed by both the GPU **Routing** logic and the CPU **Update Weights \& Placement** component. The **Replication \& Placement Compute** component will consume the **Statistics Data** generated by the **Statistics** component which runs on the GPU.
+  * Orchestrate the process (**Update Weights \& Placemen**t component) to update and reload the MoE weights from CPU host memory to GPU device memory. This component will also consume the **Placement Info** generated by the **Replication \& Placement Compute** component. Our scalable design allows us to reload the MoE weights from remote GPU memory via MNNVL or NIC.
+
+* GPU side
+  * This is the main execution workflow of inference. The following new GPU components are introduced with our design:
+    * EP communication kernels. In Figure 11, those are the **Dispatch** and **Combine** components.
+    * Online traffic data statistics collector (the **Statistics** component). This component collects the **Statistics Data** which is to be consumed by the **Replication \& Placement Compute** component.
+    * The MoE router logic (the **Routing** component). It sends tokens to the activated experts. It needs to be adjusted to support the dynamic placement of MoE weights. It also consumes the **Placement Info** generated by the **Replication \& Placement Compute** component.
+    * The MoE computation logic (the **MoE** component) also needs to be adjusted correspondingly.
+
+* Careful synchronization between CPU and GPU components is needed to ensure the validity of the entire execution process ; particularly, to avoid hangs, as well as invalid or sub-optimal executions.
+
+For the **Update Weights \& Placemen**t component, we identified two design choices:
+
+* Bulk approach
+  * In this approach, when the MoE weight redistribution logic starts, the inference taking place on the current serving instance will have to be paused until the MoE weight redistribution process finishes. We estimate that it can lead to approximately **0.5 \~** **1 second** online serving stalls ; causing in the worst-cases request timeouts.  This kind of timeout or stalls can be mitigated at the system level by routing the requests to other serving instances or just request replays.
+* Layer-wise approach
+  * In this approach, the MoE weight redistribution is done layer by layer such that at each decode iteration only certain layers (it can be configured) will be impacted by a redistribution of their MoE weights. With this design, it will take several iterations to re-balance the MoE weights of all the layers. We expect this approach to have almost no impact on the user experience.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture13.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 13: One example of the layer-wise MoE weight re-distribution</em></sub></p>
+
+In our current system, we choose to implement **the layer-wise approach** to minimize the impact on the online user experience. The bulk approach should be much easier to implement and we will not discuss it in this tech blog.
+To implement the layer-wise approach properly, we need to carefully evaluate the capability of different underlying HWs to decide on the concrete implementation.
+Let’s use GB200 as an example. In Figure 14, we illustrate the communication bandwidth of different HW elements in a GB200 node.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture14.png" width="500" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 14: high-level topology of GB200 system</em></sub></p>
+
+Using the DeepSeek R1 model as an example, with FP4 precision, each MoE expert occupies 24MiB of memory space. There are 256 experts per layer. In total, that's 58 MoE layers, plus 1 MTP layer. So the maximum amount of MoE weights which need to be redistributed, to achieve EP balance, is 348GiB.
+One GB200 node has 480GB LPDDR5X memory for each Grace CPU. In total, that's 960GB of host memory across a NUMA domain. One GB200 node can host the entire MoE weights of a model like the DeepSeek R1 LLM in its CPU host memory. Based on it, the MoE weight redistribution can be done by moving the corresponding MoE weights from CPU host memory to GPU device memory.
+
+Let's assume that we target **50ms** inter-token-latency (ITL) as our main latency constraint. Using back-of-the-envelope calculation, it can be computed that the amount of expert weights which can be moved from the MoE weight pool (can be kept in Grace CPU memory or GPU memory on another node) to the Blackwell GPU (to do the real MoE inference) for each decode iteration is:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture15.png" width="300" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 15: The theoretical expert count to be updated for each iteration with following 50ms ITL constraints, by using different HW as pools to store the full MoE weight</em></sub></p>
+
+Based on this analysis, and, if we rely on the Grace CPU memory on each node to store the MoE weight pool, for each decode iteration, the weights of up to 300 experts can be redistributed to each GPU on the same GB200 node.
+Assuming our goal is to finish the MoE weight re-balancing for the full model within 5 decode iterations, here are some more concrete use-case studies:
+
+* Use-case 1 (with balanced expert placement and no expert replication)
+  * 64 GPUs with 4 Experts per GPU
+  * 58 layers, 232 Experts per GPU
+  * Need 47 Expert Update / Iter, all the methods can satisfy the latency goal.
+* Use-case 2 (with both balanced expert placement and replication)
+  * 64 GPUs or 72 GPUs with 5 Experts per GPU
+  * 58 layers, 290 Experts per GPU
+  * Need 58 Expert Update / Iter, all the methods can satisfy the latency goal.
+* Use-case 3 (with both balanced expert placement and replication)
+  * 36 GPUs with 8 Experts per GPU
+  * 58 layers, 464 Experts per GPU
+  * Need 93 Expert Update / Iter, all the method can satisfy the latency goal.
+
+In summary, based on the theoretical analysis, using Grace CPU memory as the pool to hold the full size MoE weights should allow us to achieve the EP (Expert-Parallelism) re-balancing within 5 decode iterations. If we relax the requirements to 10 or more iterations, there can be even more system implementation flexibility.
+
+Next we will introduce the implementation details of our large-scale EP system.
+
+## EP communication kernels
+
+We have evaluated multiple ways of implementing the EP communication kernels needed by large-scale EP, including DeepEP, other solutions and the development of an approach from scratch.
+
+The current technical decision is:
+
+* For GB200, we implemented a new set of [custom EP communication kernels](https://github.com/NVIDIA/TensorRT-LLM/pull/3504).
+* For non-GB200 systems (such as B200 or Hopper), we chose to integrate DeepEP directly, with some potential enhancement.
+
+ The considerations are:
+
+* DeepEP is a great piece of work done by the DeepSeek team. When we started the TensorRT-LLM large-scale EP efforts, our first focus was on GB200. We chose to implement our own custom EP communication kernels as it was easier to introduce optimizations requiring the GB200 MNNVL capability. Also, based on our current evaluation, DeepEP does not provide CUDA graph compatibility for all the scenarios. We believe that CUDA graph is needed for the scenario we are interested in.
+* When we started the efforts to enable large-scale EP on Hopper, we concluded that DeepEP could be adapted and meet our needs on this platform. We plan to extend DeepEP to work for B200 in the future.
+
+We are also actively evaluating the possibility of consolidating GB200 and non-GB200 EP communication kernels into a single solution to make the system simpler, and we will keep the community posted on the status.
+Now let’s talk a little bit more about the optimizations introduced into the custom EP communication kernel implementations.
+
+### Motivation of EP communication kernels for GB200
+
+In the Decoding Phase with Prefill-Decoding (PD) separation, we observed that the batch size may not be very large, such that latency is a significant concern. In this context, compatibility with CUDA Graph is a strong requirement.
+[NCCL](https://github.com/NVIDIA/nccl) is a great GPU communication library which provides highly efficient communication kernels and primitives.
+For now, its Send and Recv operations require the data size to be explicitly specified when invoking with `ncclSend`/`ncclRecv`.
+However, in large expert parallel (large-EP) scenarios, the data size to be transferred is determined dynamically based on the model's output at each iteration.
+With the current NCCL's communication interface, an explicit synchronization is required to send the communication size back to the CPU and launch NCCL calls from the CPU with the corresponding data size. This would break CUDA Graph compatibility.
+This limitation has forced us to develop high performance communication kernels compatible with CUDA graph and that can accept communication sizes directly from GPU memory.
+We also wanted those kernels, for GB200, to take of advantage of the MNNVL's memory bandwidth.
+
+### EP communication kernels implementation
+Our kernels adopt a communication approach similar to NCCL’s LL128 primitive. As this approach strikes a good balance between latency and bandwidth, it is well-suited for LLM inference.
+Our custom kernels can read the communication size directly from GPU memory and are compatible with CUDA Graph even when the data size varies across runs.
+
+In our implementation, we use the CUDA's Driver API to establish a peer-to-peer (P2P) buffer via MNNVL as a workspace.
+Each GPU can access the workspace of other GPUs. The workspace is divided into multiple channels, each assigned to a remote GPU as a write buffer.
+Those write buffers are used in a FIFO manner, with flags used to synchronize FIFO status and avoid data corruption.
+More details can be found in [PR 3504](https://github.com/NVIDIA/TensorRT-LLM/pull/3504).
+
+## EP Load Balancer
+
+TensorRT-LLM implements a set of functionalities to achieve EP Load Balancing. There are several key components:
+
+### Python Interface
+
+The Python interface layer provides a user-friendly PyTorch/Python native interface to access the MoE Load Balancing implementations, such as the Python wrapper for the GPU/CPU synchronization logics and the online data statistics collection, and other logics implemented in 4.2 to 4.4.
+
+### C++ extension
+
+The C++ extension acts as the bridge between the PyTorch/Python interface and the C++/CUDA core implementations.
+
+### Core implementations of the host logic
+
+The host-side core logic implements the following key parts:
+
+* Load balancing algorithms
+  * Replication algorithm
+  * Placement algorithm
+* Orchestration logic of MoE weight updates
+* MoE weight update logic
+
+### Core implementations of the GPU logic
+
+The GPU core logic contains the following components:
+
+* Online traffic statistics collection
+  * To reduce the CPU-GPU back-and-forth synchronization cost, we choose to implement the online traffic statistic logic on the GPU side.
+* Expert routing logic
+  * The MoE routing logic needs to be enhanced to adapt with the dynamic EP balance impact.
+
+There are GPU/CPU synchronization components implemented. More details can be found in [PR 4384](https://github.com/NVIDIA/TensorRT-LLM/pull/4384) and [PR 4495](https://github.com/NVIDIA/TensorRT-LLM/pull/4495).
+
+Based on these core utilities, there are two versions of EP Load Balancer in TensorRT-LLM: Offline EP Load Balancer and Online EP Load Balancer.
+
+### Online EP Load Balancer
+
+For production deployment needs, Online EP Load Balancer is recommended since it can adapt itself to the change in the online traffic pattern, dynamically, thus with more performance guarantees.
+
+However, the Online EP Load Balancer faces several challenges.
+
+First, load balancing introduces dynamic Expert placement. A single Expert’s location may shift based on current workload. For example, if Expert 0 and Expert 1, originally assigned to Rank 0, both become hot experts, the load balancing policy might redistribute them to different ranks alongside cold experts, which necessitates timely updates to the weight data.
+
+We aim for the Online Load Balancer to react swiftly to changes in request patterns and adjust Expert assignments to avoid load imbalance issues. Importantly, we do not want the balancing process to interfere with the online inference execution process, nor do we want to employ a "Stop-The-World" (Bulk) strategy for updating weights.
+
+In large MoE models (such as DeepSeek R1) during the decoding phase, batch sizes are often small, making CUDA Graph an effective acceleration method; especially when high TPS per user is required. This benefit is even more pronounced on platforms like GB200. For this reason, we want the entire load balancing mechanism to be compatible with CUDA Graph.
+
+To avoid invalidating pre-captured CUDA Graphs, we perform in-place weight updates by writing new Expert weights into the same memory locations, rather than swapping out tensor pointers. This ensures the weights tensor address remains unchanged in the Model Engine.
+
+In this design, each Expert Slot serves as a container for holding an Expert’s weights, decoupled from any specific Expert. The number of Expert Slots must be greater than or equal to the total number of Experts so that each Expert always has at least one available Slot. Hot Experts may occupy multiple Slots. Each Slot is identified by a SlotId.
+
+Since the MoE model's routing logic outputs ExpertIds (not SlotIds), we maintain a routing table from ExpertId to SlotId which is updated by the load balancing policy, periodically. The Load Balancer Routing module uses the current routing table (Expert replication information and slots) to map each token to a suitable Expert Slot.
+
+To make weight updates non-blocking and avoid "Stop-The-World", we use a layer-wise update approach. After a layer’s forward pass completes and before its next forward pass starts, we perform the weight balancing for that layer; the next forward pass for the same layer should wait until the last update is done if it happens at this iteration.
+
+As the forward execution is typically driven by a single Python thread invoking a sequence of PyTorch operations, we offload the weight update routine to a background C++ thread. The Python side only initializes the Expert Slots and registers Expert Weights in shared host memory.
+
+During forward execution, we insert lightweight lock/unlock kernels before and after MoE computations, as well as kernels for collecting statistics and assigning SlotIds to ExpertIds. These kernels must be short and overlap-friendly to minimize performance impact. As long as the CPU weights update thread can finish its work on time, the lock/unlock will be very short. All, except for the routing kernel, are lightweight and can easily overlap with forward kernels in different CUDA streams; the routing kernel is the primary optimization focus.
+
+On GB200, we utilize MNNVL for inter-GPU communication during Expert dispatch and combine. Expert weights reside in host memory and are brought into GPU memory via C2C to support asynchronous updates. A multi-threaded Host Copy Engine manages this process, auto-detecting NUMA topology and choosing optimal CPU cores, enabling full asynchrony with model forward passes.
+
+On servers without C2C but with PCIe, if cross-node communication is required, network and weight updates may compete for PCIe bandwidth, requiring additional tuning and design consideration. We have not implemented the copy engine for PCIe servers yet and it is in list of future tasks.
+
+### Offline EP Load Balancer
+
+Online EP balancer is more suitable for production deployment needs to react timely to online traffic changes. However, Offline EP Balancer provides a lightweight way for performance study/debugging and validation. You can refer to [this PR](https://github.com/NVIDIA/TensorRT-LLM/pull/4695) to learn more about the implementation of the Offline EP Load Balancer. Also there is a tool provided to collect statistics about the expert activation distribution which can be used as the input to deduce the EP balancing placement strategy. You can refer to [this](https://github.com/NVIDIA/TensorRT-LLM/tree/feat/large-ep/examples/ep_load_balancer#offline-ep-load-balancer) doc to learn more details as well as how how to run through the Offline EP Load Balancer in E2E approach.
+
+## E2E evaluation
+
+### The effect of EP Load Balancer
+
+#### Offline EP Load Balancer
+As shown by Figure 1, on the machine translation dataset, MoE layer 36 suffers from extreme expert load imbalance issues, so we use that layer to illustrate the effect of EP Load Balancer. We still run DeepSeek-R1 with 32-way expert parallelism on 32 GB200 GPUs.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture16.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 16: The routed token count by receiving ranks (x-axis) and iterations (y-axis) at layer 36 (No EPLB)</em></sub></p>
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture17.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 17: The routed token count by experts (x-axis) and iterations (y-axis) at layer 36 (No EPLB)</em></sub></p>
+
+Figure 16 displays the routed token count by receiving ranks over 50 iterations, which could represent the workload for each rank. Rank 13 receives significantly more tokens than all other ranks, and such an imbalanced workload distribution is almost constant over iterations. Figure 17 breaks down the workload to experts. Clearly, two hot experts on rank 13 cause the excessive pressure on this rank.
+
+With the above statistics, we can perform offline EPLB. One potential strategy is to maintain the 32-way expert parallelism while increasing expert slots from 8 to 9 per rank. This results in 32 redundant experts and 288 expert slots in total. Figures 18 and 19 show the routed token count after EPLB. Clearly, the per-rank token distribution is much more balanced, and there are no hot experts anymore.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture18.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 18: The routed token count by receiving ranks (x-axis) and iterations (y-axis) at layer 36 (EPLB with 9 per-rank slots and EP 32)</em></sub></p>
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture19.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 19: The routed token count by experts (x-axis) and iterations (y-axis) at layer 36 (EPLB with 9 per-rank slots and EP 32)</em></sub></p>
+
+Another EPLB strategy is to maintain 8 expert slots per rank while increasing expert parallelism to 36 ways. This strategy also results in 32 redundant experts and 288 expert slots in total. As displayed by Figures 20 and 21, the workloads also become balanced across ranks or expert slots.
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture20.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 20: The routed token count by receiving ranks (x-axis) and iterations (y-axis) at layer 36 (EPLB with 8 per-rank slots and EP 36)</em></sub></p>
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture21.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 21: The routed token count by experts (x-axis) and iterations (y-axis) at layer 36 (EPLB with 8 per-rank slots and EP 36)</em></sub></p>
+
+For each layer and iteration, the load imbalance can be measured using simple metrics such as the standard deviation or the imbalance ratio. Given the routed token counts for all ranks (or experts), the imbalance ratio is defined as $(max - mean) / mean$, which represents the excessive workload received by the hottest rank (or expert). A perfectly balanced load would have an imbalance ratio of 0.
+
+Table 1 reports the standard deviation and imbalance ratio for the aforementioned cases. Each number is averaged from the per-layer per-iteration metrics. Without EPLB, the load imbalance is significant -- on average, the hottest rank receives 1.56 times more routed tokens than the mean. EPLB can effectively reduced the load imbalance -- on average, the hottest rank receives only about 0.11 times more routed tokens than the mean.
+
+|  | By rank |  |  | By expert slot |  |  |
+| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+|  | Average | Std. Dev. | Imb. Ratio | Average | Std. Dev. | Imb. Ratio |
+| No EPLB (8 per-rank slots and EP 32) | 1024 | 491.6 | 1.564 | 128 | 164.1 | 10.948 |
+| EPLB (9 per-rank slots and EP 32)    | 1024 |  52.0 | 0.109 | 114 | 77.8  |  1.792 |
+| EPLB (8 per-rank slots and EP 36)    | 1024 |  53.9 | 0.115 | 128 | 87.5  |  1.791 |
+
+*Table 1: The standard deviation and imbalance ratio (average of per-layer and per-iteration metrics)*
+
+#### Online EP Load Balancer
+
+In the previous section, we demonstrated the impact of the Offline EP Load Balancer. Given our implementation of the Online EP Load Balancer, we further examine the dynamic patterns of EP balancing in online conditions.
+Let’s still use the machine translation dataset, DeepSeek R1 model,  layer 36 (which is shown in Figure 1) as the example to understand the online behaviour:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture22.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 22: The token count sent from rank 0 to all the ranks, run on GB200, with EP32, local batch size=256, with 256 slots(no replication), so each rank hosts 8 experts</em></sub></p>
+
+From Figure 22, it is clear that from iteration 1963, since the EPLB has taken into effect, the original hottest rank 13 is no longer the hot rank and the original workload sent to rank 13 has been redistributed to rank 0 and rank 1.
+
+In Figure 22, only placement adjustment has been done by the Online EPLB. If we further introduce expert replication, the balancing can be further improved, as shown on the following figure:
+
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture23.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 23: The token count sent from rank 0 to all the ranks, run on GB200, with EP32, local batch size=256, with 288 slots(with replication), so each rank hosts 9 experts</em></sub></p>
+
+Clearly, by introducing expert replication when doing the EPLB, the EP balancing can be further improved.
+Further complicated experiments can be designed to observe the Online EPLB taking into effect periodically during the online serving process to balance the EP workload in a dynamic way and we welcome the community to report any interesting EPLB pattern observation to us.
+
+### Performance study
+Note: all the representative workloads illustrated in this section are from the performance traces extracted from DeepSeek R1 inference execution. The E2E performance tuning/optimization is still ongoing and we will discuss them in the future technical blogs.
+
+Let's use some representative workloads to illustrate the performance impact with large-scale EP.
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture24.png" width="500" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 24: EP impact over MoE Group GEMM and EP communication</em></sub></p>
+In Figure 24, it can be observed that by increasing the EP size from 4 to 72, the MoE Group GEMM computation time gets reduced, while the EP communication time (for EP4/EP8 Reduce/Scatter is used, while for EP>8 All2All is used) stays almost constant.
+When the EP size increases from 18 to 32, the speed-up diminishes. We are working on optimizing it.
+
+Next, let's use some representative workloads to understand the performance impact with EPLB.
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture25.png" width="500" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 25: EPLB performance impact</em></sub></p>
+Clearly in Figure 25, we can see that EPLB brings a clear performance improvement when the EP size increases, for both MoE GroupGEMM and EP communication times.
+
+## Reproducing steps
+Currently to run through the reproducing steps described in this section, please, use this [feature branch](https://github.com/NVIDIA/TensorRT-LLM/tree/feat/large-ep/tensorrt_llm). It will get merged to the main branch soon.
+### The effect of EP Load Balancer
+Please, refer to the [EP Load Balancer example](https://github.com/NVIDIA/TensorRT-LLM/tree/feat/large-ep/examples/ep_load_balancer) for how to reproduce the results for the offline EP Load Balancer.
+
+##### Step 1: Run inference and collect statistics
+
+To generate the necessary statistics for load rebalancing, run your model on a target dataset and count the routed expert IDs during inference. Once the counting process is complete, the statistics will be saved for further processing.
+
+Set up some environment variables:
+
+```bash
+export MODEL_NAME=deepseek-ai/DeepSeek-R1
+export MODEL_PATH=<YOUR_MODEL_PATH>
+# Set the expert statistic data path
+export EXPERT_STATISTIC_PATH=./expert_statistic
+# Enable counting of routed expert IDs from iteration 100 to iteration 200
+export EXPERT_STATISTIC_ITER_RANGE=100-200
+```
+
+Prepare a dataset following the [benchmarking documentation](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/performance/perf-benchmarking.md#preparing-a-dataset) and save it as `./dataset.json`.
+
+Run 32-way expert parallelism inference on the prepared dataset. Please refer to the [LLM API MGMN example](https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/llm-api/llm_mgmn_trtllm_bench.sh) for details on running `trtllm-bench` on Slurm.
+
+```bash
+cat > ./extra_llm_api_options.yaml <<EOF
+enable_attention_dp: true
+use_cuda_graph: true
+EOF
+
+trtllm-llmapi-launch \
+trtllm-bench --model ${MODEL_NAME} \
+    --model_path ${MODEL_PATH} \
+    throughput \
+    --tp 32 \
+    --ep 32 \
+    --extra_llm_api_options ./extra_llm_api_options.yaml \
+    --kv_cache_free_gpu_mem_fraction 0.75 \
+    --backend pytorch \
+    --dataset ./dataset.json \
+    --warmup 0 \
+    --eos_id -1
+```
+
+After inference, review the dumped statistic files in `$EXPERT_STATISTIC_PATH`. Run the `examples/ep_load_balancer/report_load_statistics.py` script to show the standard deviation and imbalance ratio metrics:
+
+```bash
+python examples/ep_load_balancer/report_load_statistics.py --expert_statistic_path $EXPERT_STATISTIC_PATH
+```
+
+The output would look like:
+
+```txt
+Load statistics:
+           mean         std  imbalance-ratio
+3        1024.0  187.955200         0.498043
+4        1024.0  202.728516         0.537602
+5        1024.0  209.339981         0.458676
+...
+58       1024.0  570.880676         2.461014
+59       1024.0  341.339447         0.717498
+60       1024.0  381.045471         1.119648
+average  1024.0  491.651199         1.564272
+```
+
+##### Step 2: Generate the EPLB configuration
+
+Use the provided `examples/ep_load_balancer/generate_eplb_config.py` script to convert the collected statistics into an EPLB configuration file. Specify the target expert parallelism size (`--ep_size`) and the total number of slots (`--num_slots`) that will be used for deployment. For example, if we choose to maintain 8 expert slots per rank while increasing expert parallelism to 36 ways, there should be 32 redundant experts and 288 expert slots in total.
+
+```bash
+python examples/ep_load_balancer/generate_eplb_config.py \
+    --ep_size 36 \
+    --num_slots 288 \
+    --expert_statistic_path $EXPERT_STATISTIC_PATH \
+    --output_path ./moe_load_balancer.yaml
+```
+
+The `./moe_load_balancer.yaml` file would look like:
+
+```yaml
+initial_global_assignments:
+  3: [138, 81, 60, ..., 69, 250, 77]
+  4: [24, 243, 72, ..., 90, 251, 52]
+  5: [120, 162, 246, ..., 14, 192, 171]
+  ...
+  58: [67, 70, 160, ..., 212, 103, 125]
+  59: [45, 142, 152, ..., 99, 205, 49]
+  60: [34, 162, 119, ..., 234, 26, 129]
+num_slots: 288
+layer_updates_per_iter: 0
+```
+
+##### Step 3: Run inference with the EPLB configuration
+
+Set up some environment variables:
+
+```bash
+# Set a new expert statistic data path
+export EXPERT_STATISTIC_PATH=./expert_statistic_eplb
+# Enable counting of routed expert IDs from iteration 100 to iteration 200
+export EXPERT_STATISTIC_ITER_RANGE=100-200
+```
+
+Run 36-way expert parallelism inference with the EPLB configuration incorporated:
+
+```bash
+cat > ./extra_llm_api_options_eplb.yaml <<EOF
+enable_attention_dp: true
+use_cuda_graph: true
+moe_load_balancer: ./moe_load_balancer.yaml
+EOF
+
+trtllm-llmapi-launch \
+trtllm-bench --model ${MODEL_NAME} \
+    --model_path ${MODEL_PATH} \
+    throughput \
+    --tp 36 \
+    --ep 36 \
+    --extra_llm_api_options ./extra_llm_api_options_eplb.yaml \
+    --kv_cache_free_gpu_mem_fraction 0.75 \
+    --backend pytorch \
+    --dataset ./dataset.json \
+    --warmup 0 \
+    --eos_id -1
+```
+
+Run the `examples/ep_load_balancer/report_load_statistics.py` script again:
+
+```bash
+python examples/ep_load_balancer/report_load_statistics.py --expert_statistic_path $EXPERT_STATISTIC_PATH
+```
+
+The output would look like:
+
+```txt
+Load statistics:
+           mean        std  imbalance-ratio
+3        1024.0  37.612328         0.081947
+4        1024.0  42.367714         0.093256
+5        1024.0  42.623219         0.092623
+...
+58       1024.0  49.167507         0.113420
+59       1024.0  44.529514         0.092314
+60       1024.0  48.408348         0.101029
+average  1024.0  53.976442         0.115378
+```
+
+> **Note:** Counting expert IDs can significantly hurt performance, so remember to disable it by unsetting `EXPERT_STATISTIC_ITER_RANGE` when running inference for benchmarking or production purposes.
+
+### Miscellaneous
+- **GB200 NUMA binding**: Since on GB200, GPU memory are also on NUMA nodes, system can also use GPU's memory. It is suggested to use `numactl -m 0,1` to bind memory to CPU nodes if you don't want that happen.
+- **Shared Memory Clean Up**: To achieve online load balance, all expert weights are stored in shared host memory. 4 ranks on same GB200 node share the same expert weights to save memory. Normally, these shared host memory will be cleaned up at process exit. But if an abnormal exit happens, they may not get chance to be cleaned. In that case, you may need to manually check `/dev/shm` directory and delete `/dev/shm/moe_shared_*` if any.
+
+## Expanded thoughts
+
+We deeply acknowledge the system innovation from the DeepSeek team. The introduction of the large-scale EP support into their in-house inference system and their open spirit of sharing their engineering insights with the community is extremely valuable and has already boost the performance of inference system design.
+**Also we want to point out that there are no magical solutions when doing system design and optimization, such as large-scale EP.**
+Based on our current performance analysis, when you plan to apply large-scale EP, you should take the following factors into considerations:
+
+* Is the MoE GroupGEMM computation time an E2E performance bottleneck?
+  * Large-scale EP mainly helps reduce the MoE GroupGEMM execution time by reducing expert weight loading pressure and, thus, increases the compute intensity of the MoE GroupGEMM layer. For your workload setting, if the MoE GroupGEMM computation is not the bottleneck, then large-scale EP may not help much.
+* The latency constraints.
+  * Large-scale EP mostly helps when there are strict latency constraints, especially on GB200/B200 with more memory capacity.  For GPUs with less memory capacity, for scenarios with less latency constraints, large-scale EP can still help as it helps achieve higher concurrency and better tokens/s/GPU.
+* The available HW spec.
+  * The optimal configuration for large-scale EP depends on GPU specifications \- including memory bandwidth, capacity, inter-GPU bandwidth, and compute power \- which determine both whether to employ large-scale EP and the ideal degree of parallelism.
+* System complexity and the production deployment constraints.
+  * Without fault tolerance guarantee, large-scale EP can increase the online system failure ratio. Even if it is possible to do cluster level coordination to route the traffic to other running serving instances when certain large-scale EP serving instances fail, the large number of GPUs required for a single-instance deployment of large-scale EP can increase system level deployment challenges.
+
+**In the future, we plan to summarize and share more of the best practices of deploying with large-scale EP techniques.**
+
+**Please use your own judgement to decide whether to use large-scale EP into your system or not, and when you use it, what is the suitable EP size and concrete deployment settings suitable for your own requirements.**
+
+The current TensorRT-LLM large-scale EP implementation is not perfect and there are still known limitations (community contributions are welcome to help us improve). For example, we need:
+
+* More platforms coverage
+  * Extending the support to cover other non-GB200 NVIDIA GPU HWs. **We are actively working on this now.**
+  * Currently the large-EP support only covers NVFP4 data precision, incremental efforts are needed to cover FP8 and INT8/INT4 data precision.
+* Performance
+  * Further performance tuning and optimizations. **We are actively working on this now.**
+  * More validation with workloads close to production traffic. **Here we highly welcome the community’s feedback to help us calibrate TensorRT-LLM large-scale EP implementation based on more concrete workloads.**
+  * The thorough validation of combination with other inference core features, such as dis-aggregated serving, speculative decoding, validation on more MoE model families, etc. **We are actively working on this now.**
+* Ease-of-use
+  * Easy customization
+    * We believe large-scale EP can be decomposed into at least two layers:
+      * A core layer which developed by inference engine developers. This layer contains the customized EP communication kernels, the synchronization logic between CPU and GPU, the MoE weight re-distributed logic.
+      * A strategy layer which can be co-developed by the inference engine developers as well as machine learning researchers. This part contains tools to collect the online traffic statistics in different approaches, and algorithms for the optimal replication and placement of experts.
+    * Based on this understanding, we plan to make components close to the strategy layer easier to be extended and customized by community users. We hope to encourage better ideas to emerge.
+  * Based on user inputs of the deployment requirements (ISL/OSL, latency constraints, HW spec), we hope to be able to automatically recommend the best EP setting.
+* Fault tolerance
+  * Because large-scale EP deployment solution may lead to an increased fault ratio of the online deployment system, it may increase the need for cross-layer interactions with multiple components of the E2E LLM inference system on NVIDIA GPUs. This includes the low-level communication kernel, the cluster-level orchestrator and scheduler, etc. We are actively working with various NVIDIA engineering teams to push forward on this.
+
+
+We believe the current implementation can be viewed as a reasonable E2E large-scale EP implementation and we encourage the community to try new ideas and performance validation. We encourage the community to share feedback to help us move fast in this area.  We are actively tracking the TensorRT-LLM large-scale EP execution in [this](https://github.com/NVIDIA/TensorRT-LLM/issues/4127) GitHub issue to ensure transparency to the community.
+
+
+## Acknowledgement
+
+The large-scale EP work is another great team effort, spanning kernel-level optimizations, runtime enhancements, and systematic performance analysis and tuning. While we cannot individually acknowledge every contributor, we are proud to recognize the dedicated team of engineers whose collective expertise has helped advance the state-of-the-art in terms of performance in TensorRT-LLM.
+Through this collaborative endeavor, we have developed valuable insights to allow us improve GPU utilization for large language model inference. We hope that the techniques and the experience shared in this blog will help the developer community to better leverage NVIDIA GPU capabilities in their mission-critical LLM inference applications.
diff --git a/latest/_sources/torch/adding_new_model.md.txt b/latest/_sources/torch/adding_new_model.md.txt
index 53f2f236ff..4ce5988c99 100644
--- a/latest/_sources/torch/adding_new_model.md.txt
+++ b/latest/_sources/torch/adding_new_model.md.txt
@@ -89,8 +89,8 @@ class MyModel(DecoderModel):
 
     def forward(self,
                 attn_metadata: AttentionMetadata,
-                input_ids: Optional[torch.LongTensor] = None,
-                position_ids: Optional[torch.LongTensor] = None,
+                input_ids: Optional[torch.IntTensor] = None,
+                position_ids: Optional[torch.IntTensor] = None,
                 inputs_embeds: Optional[torch.FloatTensor] = None):
         # Define the forward computation of the model
         ...
diff --git a/latest/advanced/disaggregated-service.html b/latest/advanced/disaggregated-service.html
index 34b99a4b6e..105eb9d30b 100644
--- a/latest/advanced/disaggregated-service.html
+++ b/latest/advanced/disaggregated-service.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -808,9 +808,9 @@ export UCX_RNDV_PIPELINE_ERROR_HANDLING=y
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/executor.html b/latest/advanced/executor.html
index 7a10cd2e85..9c0d869ecf 100644
--- a/latest/advanced/executor.html
+++ b/latest/advanced/executor.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -813,9 +813,9 @@ the TensorRT-LLM C++ Executor API.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/expert-parallelism.html b/latest/advanced/expert-parallelism.html
index fbd8066e74..881c7a9b01 100644
--- a/latest/advanced/expert-parallelism.html
+++ b/latest/advanced/expert-parallelism.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -680,9 +680,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/gpt-attention.html b/latest/advanced/gpt-attention.html
index ad3ec90def..8612b7f49c 100644
--- a/latest/advanced/gpt-attention.html
+++ b/latest/advanced/gpt-attention.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -998,9 +998,9 @@ is computed as:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/gpt-runtime.html b/latest/advanced/gpt-runtime.html
index b2d8b3a4b2..07d329d497 100644
--- a/latest/advanced/gpt-runtime.html
+++ b/latest/advanced/gpt-runtime.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1038,9 +1038,9 @@ The <code class="docutils literal notranslate"><span class="pre">GptDecoder</spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/graph-rewriting.html b/latest/advanced/graph-rewriting.html
index 2586085da9..c31d4886b8 100644
--- a/latest/advanced/graph-rewriting.html
+++ b/latest/advanced/graph-rewriting.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -859,9 +859,9 @@ techniques to optimize the underlying graph.  It provides a wrapper similar to P
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/kv-cache-management.html b/latest/advanced/kv-cache-management.html
index 5f5065d0db..ed6b002c42 100644
--- a/latest/advanced/kv-cache-management.html
+++ b/latest/advanced/kv-cache-management.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -764,9 +764,9 @@ An “event” is any significant change in the lifecycle or state of a KV cache
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/kv-cache-reuse.html b/latest/advanced/kv-cache-reuse.html
index 619e464383..54d0cd0631 100644
--- a/latest/advanced/kv-cache-reuse.html
+++ b/latest/advanced/kv-cache-reuse.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -737,9 +737,9 @@ Assume vocabulary size is 100, which means normal text token ids are in range [0
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/lora.html b/latest/advanced/lora.html
index 81f4f71e6e..722dff5c51 100644
--- a/latest/advanced/lora.html
+++ b/latest/advanced/lora.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -851,9 +851,9 @@ The shape of <code class="docutils literal notranslate"><span class="pre">LoraWe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/lowprecision-pcie-allreduce.html b/latest/advanced/lowprecision-pcie-allreduce.html
index f3b84da6f4..4d53119ac7 100644
--- a/latest/advanced/lowprecision-pcie-allreduce.html
+++ b/latest/advanced/lowprecision-pcie-allreduce.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -708,9 +708,9 @@ This feature is optimized for PCIe-based GPU topologies and may affect model acc
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/speculative-decoding.html b/latest/advanced/speculative-decoding.html
index 06fcd229ad..f4c4862c8c 100644
--- a/latest/advanced/speculative-decoding.html
+++ b/latest/advanced/speculative-decoding.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -839,9 +839,9 @@ However, similar to any new model, you can follow the same approach to define yo
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/advanced/weight-streaming.html b/latest/advanced/weight-streaming.html
index a6c1605c91..bac3be1c19 100644
--- a/latest/advanced/weight-streaming.html
+++ b/latest/advanced/weight-streaming.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -688,9 +688,9 @@ python3<span class="w"> </span>examples/summarize.py<span class="w"> </span><spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/architecture/add-model.html b/latest/architecture/add-model.html
index 0eb4c03b31..55595d8119 100644
--- a/latest/architecture/add-model.html
+++ b/latest/architecture/add-model.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -750,9 +750,9 @@ python<span class="w"> </span>../summarize.py<span class="w"> </span>--engine_di
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/architecture/checkpoint.html b/latest/architecture/checkpoint.html
index d56c1bb644..82688e25ad 100644
--- a/latest/architecture/checkpoint.html
+++ b/latest/architecture/checkpoint.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1017,9 +1017,9 @@ trtllm-build<span class="w"> </span>--checkpoint_dir<span class="w"> </span>./op
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/architecture/core-concepts.html b/latest/architecture/core-concepts.html
index 0b308ed822..9885faf6b4 100644
--- a/latest/architecture/core-concepts.html
+++ b/latest/architecture/core-concepts.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -514,22 +514,22 @@
 <span id="core-concepts"></span><h1>Model Definition<a class="headerlink" href="#model-definition" title="Link to this heading">#</a></h1>
 <p>TensorRT-LLM has a Model Definition API that can be used to define
 Large Language Models. This API is built on top of the powerful
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html#">TensorRT Python API</a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/index.html">TensorRT Python API</a>
 to create graph representations of deep neural networks in TensorRT. To become
 familiar with the core concepts of the TensorRT API, refer to the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/coreConcepts.html">Core Concepts</a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/coreConcepts.html">Core Concepts</a>
 section of the TensorRT documentation before proceeding further.</p>
 <p>In TensorRT-LLM, the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/HEAD/tensorrt_llm/builder.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder</span></code></a> class
 contains a
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
 object. That instance is used in the <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder.create_network</span></code>
 method to create an instance of the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Network.html#tensorrt.INetworkDefinition"><code class="docutils literal notranslate"><span class="pre">tensorrt.INetworkDefinition</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html#tensorrt.INetworkDefinition"><code class="docutils literal notranslate"><span class="pre">tensorrt.INetworkDefinition</span></code></a>
 class. The <code class="docutils literal notranslate"><span class="pre">INetworkDefinition</span></code> object can then be populated using the free
 functions defined in the
 <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/HEAD/tensorrt_llm/functional.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.functional</span></code></a>.</p>
 <p>A simple example of such a free function is <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.activation</span></code> that inserts a
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Layers.html#tensorrt.IActivationLayer"><code class="docutils literal notranslate"><span class="pre">tensorrt.IActivationLayer</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Layers.html#tensorrt.IActivationLayer"><code class="docutils literal notranslate"><span class="pre">tensorrt.IActivationLayer</span></code></a>
 node in the graph of the model:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># In tensorrt_llm.functional:</span>
 
@@ -558,23 +558,23 @@ functions such as the <code class="docutils literal notranslate"><span class="pr
 <p>When the TensorRT-LLM’s Model Definition API is utilized, a graph of the network is
 assembled.  The graph can later be traversed or transformed using the graph
 traversal API exposed by the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/LayerBase.html#tensorrt.ILayer"><code class="docutils literal notranslate"><span class="pre">tensorrt.ILayer</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/LayerBase.html#tensorrt.ILayer"><code class="docutils literal notranslate"><span class="pre">tensorrt.ILayer</span></code></a>
 class. That graph will also be optimized by TensorRT during the compilation of
 the engine, as explained in the next section.</p>
 </section>
 <section id="compilation">
 <h1>Compilation<a class="headerlink" href="#compilation" title="Link to this heading">#</a></h1>
 <p>Once populated, the instance of the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Network.html#tensorrt.INetworkDefinition"><code class="docutils literal notranslate"><span class="pre">tensorrt.INetworkDefinition</span></code></a>,
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html#tensorrt.INetworkDefinition"><code class="docutils literal notranslate"><span class="pre">tensorrt.INetworkDefinition</span></code></a>,
 can be compiled into an efficient engine by the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
 In TensorRT-LLM, it is done through the <code class="docutils literal notranslate"><span class="pre">build_engine</span></code> member function of the
 <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder</span></code> class that calls the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder.build_serialized_network"><code class="docutils literal notranslate"><span class="pre">build_serialized_network</span></code></a>
+[<code class="docutils literal notranslate"><span class="pre">build_serialized_network</span></code>](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#tensorrt.Builder.build_serialized_network
 method of the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Builder.html#tensorrt.Builder"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
 object. That call, if everything works as expected, produces an instance of the
-<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/FoundationalTypes/HostMemory.html#tensorrt.IHostMemory"><code class="docutils literal notranslate"><span class="pre">tensorrt.IHostMemory</span></code></a>
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/FoundationalTypes/HostMemory.html#tensorrt.IHostMemory"><code class="docutils literal notranslate"><span class="pre">tensorrt.IHostMemory</span></code></a>
 class. That object is an optimized TensorRT engine that can be stored as a
 binary file.</p>
 <section id="tensorrt-compiler">
@@ -1021,9 +1021,9 @@ srun<span class="w"> </span><span class="se">\</span>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/architecture/model-weights-loader.html b/latest/architecture/model-weights-loader.html
index bc27b875a5..55642a8f94 100644
--- a/latest/architecture/model-weights-loader.html
+++ b/latest/architecture/model-weights-loader.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -939,9 +939,9 @@ The support for Qwen-1 is in <code class="docutils literal notranslate"><span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/architecture/overview.html b/latest/architecture/overview.html
index 2000e1863a..627ebb10d3 100644
--- a/latest/architecture/overview.html
+++ b/latest/architecture/overview.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -668,9 +668,9 @@ Server</a> to easily create web-based services for LLMs. TensorRT-LLM supports m
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/architecture/workflow.html b/latest/architecture/workflow.html
index 98d35d1308..4ac6b3eaf7 100644
--- a/latest/architecture/workflow.html
+++ b/latest/architecture/workflow.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -847,9 +847,9 @@ The usage of this API looks like this:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html b/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
index f3c01d3f06..3651e57fa6 100644
--- a/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
+++ b/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1053,9 +1053,9 @@ For more details on <code class="docutils literal notranslate"><span class="pre"
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/Falcon180B-H200.html b/latest/blogs/Falcon180B-H200.html
index 10cab9c4b9..84138b2f60 100644
--- a/latest/blogs/Falcon180B-H200.html
+++ b/latest/blogs/Falcon180B-H200.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -806,9 +806,9 @@ ISL = Input Sequence Length
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/H100vsA100.html b/latest/blogs/H100vsA100.html
index 4f4be35ed3..d7460e2915 100644
--- a/latest/blogs/H100vsA100.html
+++ b/latest/blogs/H100vsA100.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -515,7 +515,7 @@
 </div></blockquote>
 <section id="h100-has-4-6x-a100-performance-in-tensorrt-llm-achieving-10-000-tok-s-at-100ms-to-first-token">
 <h1>H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token<a class="headerlink" href="#h100-has-4-6x-a100-performance-in-tensorrt-llm-achieving-10-000-tok-s-at-100ms-to-first-token" title="Link to this heading">#</a></h1>
-<p>TensorRT-LLM evaluated on both Hopper and Ampere shows <strong>H100 FP8 is up to 4.6x max throughput and 4.4x faster 1st token latency than A100</strong>. H100 FP8 is able to achieve over 10,000 output tok/s at <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/performance.html#h100-gpus-fp8">peak throughput</a> for 64 concurrent requests, while maintaining a 1st token latency of 100ms.  For <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/performance.html#id1">min-latency</a> applications, TRT-LLM H100 can achieve less than 10ms to 1st token latency.</p>
+<p>TensorRT-LLM evaluated on both Hopper and Ampere shows <strong>H100 FP8 is up to 4.6x max throughput and 4.4x faster 1st token latency than A100</strong>. H100 FP8 is able to achieve over 10,000 output tok/s at peak throughput for 64 concurrent requests, while maintaining a 1st token latency of 100ms. For min-latency applications, TRT-LLM H100 can achieve less than 10ms to 1st token latency.</p>
 <img src="https://github.com/NVIDIA/TensorRT-LLM/blob/rel/docs/source/blogs/media/TRT_LLM_v0-5-0_H100vA100_tps.png?raw=true" alt="max throughput" width="500" height="auto">
 <img src="https://github.com/NVIDIA/TensorRT-LLM/blob/rel/docs/source/blogs/media/TRT_LLM_v0-5-0_H100vA100_1st.png?raw=true" alt="1st token latency" width="500" height="auto">
 <p><sub>TensorRT-LLM throughput &amp; first token latency on H100 &amp; A100. H100 FP8, A100 FP16, SXM 80GB GPUs, ISL/OSL’s provided, TP=1, BS=32/64 max throughput, BS=1 1st token latency. TensorRT-LLM v0.5.0, TensorRT 9.1. </sub>
@@ -599,7 +599,7 @@
 </table>
 </div>
 <p><sub>FP8 H100, FP16 A100, SXM 80GB GPUs, TP1, ISL/OSL’s provided, TensorRT-LLM v0.5.0., TensorRT 9.1</sub></p>
-<p>The full data behind these charts &amp; tables and including larger models with higher TP values can be found in TensorRT-LLM’s <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/performance.html#performance-of-tensorrt-llm">Performance Documentation</a></p>
+<p>The full data behind these charts &amp; tables and including larger models with higher TP values can be found in TensorRT-LLM’s <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/latest/performance/perf-overview.html">Performance Documentation</a></p>
 <p>Stay tuned for a highlight on Llama coming soon!</p>
 <section id="mlperf-on-h100-with-fp8">
 <h2>MLPerf on H100 with FP8<a class="headerlink" href="#mlperf-on-h100-with-fp8" title="Link to this heading">#</a></h2>
@@ -758,9 +758,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/H200launch.html b/latest/blogs/H200launch.html
index 011fa0da20..9b8000f5f8 100644
--- a/latest/blogs/H200launch.html
+++ b/latest/blogs/H200launch.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -580,7 +580,7 @@
 </div>
 <p><sub>Preliminary measured performance, subject to change. TensorRT-LLM v0.5.0, TensorRT v9.1.0.4 | H200, H100 FP8. </sub></p>
 <p><sup><em>(1) Largest batch supported on given TP configuration by power of 2.</em></sup> <sup><em>(2) TP = Tensor Parallelism</em></sup></p>
-<p>Additional Performance data is available on the <a class="reference external" href="https://developer.nvidia.com/deep-learning-performance-training-inference/ai-inference">NVIDIA Data Center Deep Learning Product Performance</a> page, &amp; soon in <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/performance.html">TensorRT-LLM’s Performance Documentation</a>.</p>
+<p>Additional Performance data is available on the <a class="reference external" href="https://developer.nvidia.com/deep-learning-performance-training-inference/ai-inference">NVIDIA Data Center Deep Learning Product Performance</a> page, &amp; soon in <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/latest/performance/perf-overview.html">TensorRT-LLM’s Performance Documentation</a>.</p>
 <section id="h200-vs-h100">
 <h2>H200 vs H100<a class="headerlink" href="#h200-vs-h100" title="Link to this heading">#</a></h2>
 <p>H200’s HBM3e larger capacity &amp; faster memory enables up to <strong>1.9x</strong> performance on LLMs compared to H100. Max throughput improves due to its dependence on memory capacity and bandwidth, benefitting from the new HBM3e. First token latency is compute bound for most ISLs, meaning H200 retains similar time to first token as H100.</p>
@@ -750,9 +750,9 @@ TensorRT-LLM v0.5.0, TensorRT v9.1.0.4 | H200, H100 FP8. </sub></p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/XQA-kernel.html b/latest/blogs/XQA-kernel.html
index a9eaca8cd0..f403bd6d79 100644
--- a/latest/blogs/XQA-kernel.html
+++ b/latest/blogs/XQA-kernel.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -717,9 +717,9 @@ ISL = Input Sequence Length
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/quantization-in-TRT-LLM.html b/latest/blogs/quantization-in-TRT-LLM.html
index 3263254481..19cd327998 100644
--- a/latest/blogs/quantization-in-TRT-LLM.html
+++ b/latest/blogs/quantization-in-TRT-LLM.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -872,9 +872,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html b/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
index 39a5ba920f..0e5bd66fd0 100644
--- a/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
+++ b/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -516,6 +516,9 @@
 <section id="table-of-contents">
 <h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Link to this heading">#</a></h2>
 <ul class="simple">
+<li><p><a class="reference internal" href="#pushing-latency-boundaries-optimizing-deepseek-r1-performance-on-nvidia-b200-gpus">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></p>
+<ul>
+<li><p><a class="reference internal" href="#table-of-contents">Table of Contents</a></p></li>
 <li><p><a class="reference internal" href="#background">Background</a></p></li>
 <li><p><a class="reference internal" href="#implementation-configuration">Implementation Configuration</a></p>
 <ul>
@@ -538,7 +541,7 @@
 </ul>
 </li>
 <li><p><a class="reference internal" href="#multi-streams">Multi-streams</a></p></li>
-<li><p><a class="reference internal" href="#sparse-experts-as-gemms-only-works-when-moe-backend-cutlass">Sparse Experts as GEMMs</a></p></li>
+<li><p><a class="reference internal" href="#sparse-experts-as-gemms-only-works-when-moe-backend-cutlass">Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)</a></p></li>
 <li><p><a class="reference internal" href="#re-balanced-the-sparse-experts">Re-balanced the sparse experts</a></p>
 <ul>
 <li><p><a class="reference internal" href="#mixed-etp"><span class="xref myst">Mixed ETP</span></a></p></li>
@@ -552,7 +555,7 @@
 <li><p><a class="reference internal" href="#attention-kernel">Attention Kernel</a></p></li>
 <li><p><a class="reference internal" href="#grouped-gemm">Grouped GEMM</a></p>
 <ul>
-<li><p><a class="reference internal" href="#cutlass-backend-default-backend"><span class="xref myst">CUTLASS Backend</span></a></p></li>
+<li><p><a class="reference internal" href="#cutlass-backend-default-backend"><span class="xref myst">CUTLASS Backend (default backend)</span></a></p></li>
 <li><p><a class="reference internal" href="#trtllm-backend"><span class="xref myst">TRTLLM Backend</span></a></p></li>
 </ul>
 </li>
@@ -572,6 +575,8 @@
 <li><p><a class="reference internal" href="#future-works">Future Works</a></p></li>
 <li><p><a class="reference internal" href="#acknowledgment">Acknowledgment</a></p></li>
 </ul>
+</li>
+</ul>
 </section>
 <section id="background">
 <h2>Background<a class="headerlink" href="#background" title="Link to this heading">#</a></h2>
@@ -1188,9 +1193,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html b/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
index 8bad27f901..42a4f1c33c 100644
--- a/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
+++ b/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -62,7 +62,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -515,6 +515,9 @@
 <section id="table-of-contents">
 <h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Link to this heading">#</a></h2>
 <ul class="simple">
+<li><p><a class="reference internal" href="#deepseek-r1-mtp-implementation-and-optimization">DeepSeek R1 MTP Implementation and Optimization</a></p>
+<ul>
+<li><p><a class="reference internal" href="#table-of-contents">Table of Contents</a></p></li>
 <li><p><a class="reference internal" href="#mtp-for-inference">MTP for inference</a></p>
 <ul>
 <li><p><a class="reference internal" href="#background">Background</a></p></li>
@@ -551,6 +554,8 @@
 </li>
 <li><p><a class="reference internal" href="#acknowledgment">Acknowledgment</a></p></li>
 </ul>
+</li>
+</ul>
 <p>TensorRT-LLM achieves world-record inference performance for DeepSeek-R1 on NVIDIA Blackwell GPUs, where Multi-Token Prediction (MTP) delivers a significant speedup. In our <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md">previous blog post</a>, we discussed the key optimizations that enable the outstanding inference latency of the DeepSeek-R1 model. This article dives deeper into the implementation and optimization of MTP in TensorRT-LLM.</p>
 </section>
 <section id="mtp-for-inference">
@@ -929,9 +934,9 @@ trtllm-bench<span class="w"> </span>--model<span class="w"> </span>nvidia/DeepSe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html b/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
index 116bb0c4cb..8a01ec4061 100644
--- a/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
+++ b/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -514,6 +514,9 @@
 <section id="table-of-contents">
 <h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Link to this heading">#</a></h2>
 <ul class="simple">
+<li><p><a class="reference internal" href="#optimizing-deepseek-r1-throughput-on-nvidia-blackwell-gpus-a-deep-dive-for-developers">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></p>
+<ul>
+<li><p><a class="reference internal" href="#table-of-contents">Table of Contents</a></p></li>
 <li><p><a class="reference internal" href="#introduction">Introduction</a></p></li>
 <li><p><a class="reference internal" href="#precision-strategy">Precision strategy</a></p></li>
 <li><p><a class="reference internal" href="#parallel-strategy">Parallel strategy</a></p>
@@ -530,6 +533,8 @@
 <li><p><a class="reference internal" href="#future-works">Future Works</a></p></li>
 <li><p><a class="reference internal" href="#acknowledgment">Acknowledgment</a></p></li>
 </ul>
+</li>
+</ul>
 </section>
 <section id="introduction">
 <h2>Introduction<a class="headerlink" href="#introduction" title="Link to this heading">#</a></h2>
@@ -887,9 +892,9 @@ Running the shared and routed experts in 2 streams combined with other multi-str
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html b/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
new file mode 100644
index 0000000000..08e229be2f
--- /dev/null
+++ b/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
@@ -0,0 +1,1455 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="../../" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+    <title>Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP) &#8212; TensorRT-LLM</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  <!--
+    this give us a css class that will be invisible only if js is disabled
+  -->
+  <noscript>
+    <style>
+      .pst-js-only { display: none !important; }
+
+    </style>
+  </noscript>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
+    <link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+    <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+  
+  <!-- So that users can add custom icons -->
+  <script src="../../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
+
+    <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="../../_static/doctools.js?v=9a2dae69"></script>
+    <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+    <script src="../../_static/copybutton.js?v=65e89d2a"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM';</script>
+    <script>
+        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
+        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
+        DOCUMENTATION_OPTIONS.show_version_warning_banner =
+            false;
+        </script>
+    <link rel="icon" href="../../_static/favicon.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  <meta name="docsearch:version" content="0.21.0rc1" />
+
+
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <dialog id="pst-search-dialog">
+    
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         placeholder="Search the docs ..."
+         aria-label="Search the docs ..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+  </dialog>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+<div class="bd-header__inner bd-page-width">
+  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
+    <span class="fa-solid fa-bars"></span>
+  </button>
+  
+  
+  <div class="col-lg-3 navbar-header-items__start">
+    
+      <div class="navbar-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT-LLM - Home"/>
+    <img src="../../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT-LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT-LLM</p>
+  
+</a></div>
+    
+  </div>
+  
+  <div class="col-lg-9 navbar-header-items">
+    
+    <div class="me-auto navbar-header-items__center">
+      
+        <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-2"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-2"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-2"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-2">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+      
+    </div>
+    
+    
+    <div class="navbar-header-items__end">
+      
+        <div class="navbar-item navbar-persistent--container">
+          
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+        </div>
+      
+      
+        <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+      
+    </div>
+    
+  </div>
+  
+  
+    <div class="navbar-persistent--mobile">
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+    </div>
+  
+
+  
+    <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
+      <span class="fa-solid fa-outdent"></span>
+    </button>
+  
+</div>
+
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <dialog id="pst-primary-sidebar-modal"></dialog>
+      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
+        
+
+
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT-LLM - Home"/>
+    <img src="../../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT-LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT-LLM</p>
+  
+</a>
+
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+      <div class="sidebar-header-items__center">
+        
+          
+          
+            <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-3"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-3"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-3"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-3">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+          
+        
+      </div>
+    
+    
+    
+      <div class="sidebar-header-items__end">
+        
+          <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+        
+      </div>
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+
+
+<nav class="bd-docs-nav bd-links"
+     aria-label="Table of Contents">
+  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
+  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../quick-start-guide.html">Quick Start Guide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../key-features.html">Key Features</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../torch.html">PyTorch Backend</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../release-notes.html">Release Notes</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Installation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../installation/linux.html">Installing on Linux</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../installation/grace-hopper.html">Installing on Grace Hopper</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">LLM API</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../llm-api/index.html">API Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../llm-api/reference.html">API Reference</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../examples/index.html">LLM Examples Introduction</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_medusa_decoding.html">Generate Text Using Medusa Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_eagle_decoding.html">Generate Text Using Eagle Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_async.html">Generate Text Asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_eagle2_decoding.html">Generate Text Using Eagle2 Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_kv_events.html">Get KV Cache Events</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_lookahead_decoding.html">Generate Text Using Lookahead Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_quantization.html">Generation with Quantization</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_async_streaming.html">Generate Text in Streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_customize.html">Generate text with customization</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_auto_parallel.html">Automatic Parallelism with LLM</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_mgmn_llm_distributed.html">Llm Mgmn Llm Distributed</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_mgmn_trtllm_bench.html">Llm Mgmn Trtllm Bench</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_mgmn_trtllm_serve.html">Llm Mgmn Trtllm Serve</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/customization.html">LLM Common Customizations</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../examples/llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_medusa_decoding.html">Generate Text Using Medusa Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_eagle_decoding.html">Generate Text Using Eagle Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_async.html">Generate Text Asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_eagle2_decoding.html">Generate Text Using Eagle2 Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_kv_events.html">Get KV Cache Events</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_lookahead_decoding.html">Generate Text Using Lookahead Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_quantization.html">Generation with Quantization</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_async_streaming.html">Generate Text in Streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_inference_customize.html">Generate text with customization</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_auto_parallel.html">Automatic Parallelism with LLM</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_mgmn_llm_distributed.html">Llm Mgmn Llm Distributed</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_mgmn_trtllm_bench.html">Llm Mgmn Trtllm Bench</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/llm_mgmn_trtllm_serve.html">Llm Mgmn Trtllm Serve</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../examples/trtllm_serve_examples.html">Online Serving Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_chat_client.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_chat_client_for_multimodal.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Model Definition API</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../python-api/tensorrt_llm.layers.html">Layers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../python-api/tensorrt_llm.functional.html">Functionals</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../python-api/tensorrt_llm.models.html">Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../python-api/tensorrt_llm.plugin.html">Plugin</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../python-api/tensorrt_llm.quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../python-api/tensorrt_llm.runtime.html">Runtime</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">C++ API</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../_cpp_gen/executor.html">Executor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../_cpp_gen/runtime.html">Runtime</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Command-Line Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../commands/trtllm-build.html">trtllm-build</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../commands/trtllm-serve.html">trtllm-serve</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Architecture</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../architecture/overview.html">TensorRT-LLM Architecture</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../architecture/core-concepts.html">Model Definition</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../../architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../architecture/add-model.html">Adding a Model</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Advanced</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/executor.html">Executor API</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/lora.html">Run gpt-2b + LoRA using Executor / cpp runtime</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/kv-cache-management.html">KV Cache Management: Pools, Blocks, and Events</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/kv-cache-reuse.html">KV cache reuse</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/speculative-decoding.html">Speculative Sampling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../advanced/disaggregated-service.html">Disaggregated-Service (experimental)</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Performance</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../performance/perf-overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../performance/perf-benchmarking.html">Benchmarking</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../performance/performance-tuning-guide/index.html">Performance Tuning Guide</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../performance/performance-tuning-guide/benchmarking-default-performance.html">Benchmarking Default Performance</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../performance/performance-tuning-guide/useful-build-time-flags.html">Useful Build-Time Flags</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html">Tuning Max Batch Size and Max Num Tokens</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../performance/performance-tuning-guide/deciding-model-sharding-strategy.html">Deciding Model Sharding Strategy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../performance/performance-tuning-guide/fp8-quantization.html">FP8 Quantization</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../performance/performance-tuning-guide/useful-runtime-flags.html">Useful Runtime Options</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="../../performance/perf-analysis.html">Performance Analysis</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../reference/troubleshooting.html">Troubleshooting</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../reference/support-matrix.html">Support Matrix</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../reference/precision.html">Numerical Precision</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
+<li class="toctree-l1"><a class="reference internal" href="blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
+</ul>
+</div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item">
+
+<nav aria-label="Breadcrumb" class="d-print-none">
+  <ul class="bd-breadcrumbs">
+    
+    <li class="breadcrumb-item breadcrumb-home">
+      <a href="../../index.html" class="nav-link" aria-label="Home">
+        <i class="fa-solid fa-home"></i>
+      </a>
+    </li>
+    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)</span></li>
+  </ul>
+</nav>
+</div>
+      
+    </div>
+  
+  
+</div>
+</div>
+              
+              
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <section id="scaling-expert-parallelism-in-tensorrt-llm-part-1-design-and-implementation-of-large-scale-ep">
+<h1>Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)<a class="headerlink" href="#scaling-expert-parallelism-in-tensorrt-llm-part-1-design-and-implementation-of-large-scale-ep" title="Link to this heading">#</a></h1>
+<p>By NVIDIA TensorRT-LLM Team</p>
+<section id="table-of-contents">
+<h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Link to this heading">#</a></h2>
+<ul class="simple">
+<li><p><a class="reference internal" href="#scaling-expert-parallelism-in-tensorrt-llmpart-1-design-and-implementation-of-large-scale-ep"><span class="xref myst">Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)</span></a></p>
+<ul>
+<li><p><a class="reference internal" href="#table-of-contents">Table of Contents</a></p></li>
+<li><p><a class="reference internal" href="#motivation-for-large-scale-ep">Motivation for large-scale EP</a></p>
+<ul>
+<li><p><a class="reference internal" href="#observations-over-one-machine-translation-dataset">Observations over one machine translation dataset</a></p></li>
+<li><p><a class="reference internal" href="#observation-over-gsm8k-dataset">Observation over GSM8K dataset</a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#high-level-design-introduction">High-level design introduction</a></p></li>
+<li><p><a class="reference internal" href="#ep-communication-kernels">EP communication kernels</a></p>
+<ul>
+<li><p><a class="reference internal" href="#motivation-of-ep-communication-kernels-for-gb200">Motivation of EP communication kernels for GB200</a></p></li>
+<li><p><a class="reference internal" href="#ep-communication-kernels-implementation">EP communication kernels implementation</a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#ep-load-balancer">EP Load Balancer</a></p>
+<ul>
+<li><p><a class="reference internal" href="#python-interface">Python Interface</a></p></li>
+<li><p><a class="reference internal" href="#c-extension">C++ extension</a></p></li>
+<li><p><a class="reference internal" href="#core-implementations-of-host-side-logics"><span class="xref myst">Core implementations of host side logics</span></a></p></li>
+<li><p><a class="reference internal" href="#core-implementations-of-gpu-side-logics"><span class="xref myst">Core implementations of GPU side logics</span></a></p></li>
+<li><p><a class="reference internal" href="#online-ep-load-balancer">Online EP Load Balancer</a></p></li>
+<li><p><a class="reference internal" href="#offline-ep-load-balancer">Offline EP Load Balancer</a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#e2e-evaluation">E2E evaluation</a></p>
+<ul>
+<li><p><a class="reference internal" href="#the-effect-of-ep-load-balancer">The effect of EP Load Balancer</a></p>
+<ul>
+<li><p><a class="reference internal" href="#id1">Offline EP Load Balancer</a></p></li>
+<li><p><a class="reference internal" href="#id2">Online EP Load Balancer</a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#performance-study">Performance study</a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#reproducing-steps">Reproducing steps</a></p>
+<ul>
+<li><p><a class="reference internal" href="#id3">The effect of EP Load Balancer</a></p>
+<ul>
+<li><p><a class="reference internal" href="#step-1-run-inference-and-collect-statistics"><span class="xref myst">Step 1: Run inference and collect statistics</span></a></p></li>
+<li><p><a class="reference internal" href="#step-2-generate-the-eplb-configuration"><span class="xref myst">Step 2: Generate the EPLB configuration</span></a></p></li>
+<li><p><a class="reference internal" href="#step-3-run-inference-with-the-eplb-configuration"><span class="xref myst">Step 3: Run inference with the EPLB configuration</span></a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#miscellaneous">Miscellaneous</a></p></li>
+</ul>
+</li>
+<li><p><a class="reference internal" href="#expanded-thoughts">Expanded thoughts</a></p></li>
+<li><p><a class="reference internal" href="#acknowledgement">Acknowledgement</a></p></li>
+</ul>
+</li>
+</ul>
+<p>The development of model like DeepSeek-V3/R1, which use large-scale fine-grained Mixture-of-Experts (MoE) designs, has significantly advanced open-source model quality. Newly released open-source models such as LLaMA4 and Qwen3 also adopt a similar large-scale fine-grained MoE design principle. However, large-scale MoE models introduce new challenges for inference systems, including high memory demands and inherent expert-level workload imbalance.</p>
+<p>In the past, we have shared TensorRT-LLM’s optimization experience to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md">push the latency boundary</a> of DeepSeek R1 model, <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md">the implementation and optimization of MTP</a>(Multi-Token Prediction) and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md">the optimizations for DeepSeek R1 throughput oriented performance</a>.</p>
+<p>The DeepSeek team has also shared their valuable experience and practice on how to optimize this kind of large-scale Expert Parallelism (EP) model, including <a class="reference external" href="https://github.com/deepseek-ai/DeepEP">DeepEP</a> and <a class="reference external" href="https://github.com/deepseek-ai/EPLB">EPLB</a>. Also, the DeepSeek team has shared their concrete design considerations in <a class="reference external" href="https://arxiv.org/abs/2412.19437">this</a> tech report. On top of those great sharings, there are also nice community efforts to implement large-scale EP in other inference engines, such as <a class="reference external" href="https://lmsys.org/blog/2025-05-05-large-scale-ep/">this</a> effort from the SGLang team.</p>
+<p>In this tech blog, we will introduce the details of the design and implementation to support E2E large-scale EP in TensorRT-LLM. This blog post mainly covers the following:</p>
+<ul class="simple">
+<li><p>How to leverage NVIDIA GB200 Multi-Node NVLink (MNNVL) HW features to implement high-performance communication kernels.</p></li>
+<li><p>How to design and implement an online expert workload balancer to dynamically balance the expert load distribution and adapt to the changes of online traffic patterns. We present:</p>
+<ul>
+<li><p>The empirical data analysis demonstrating the need to do so.</p></li>
+<li><p>The implementation of the online traffic data statistic module.</p></li>
+<li><p>The design and implementation of the replication/placement strategy.</p></li>
+<li><p>The MoE weight load/re-distributer to balance the online workload across multiple GPUs.</p></li>
+<li><p>The changes needed to the MoE router and computation module to adapt to the expert load balancer needs.</p></li>
+<li><p>Some preliminary data demonstrating the effectiveness of the current implementation in TensorRT-LLM.</p></li>
+</ul>
+</li>
+</ul>
+<p>In future tech blogs, we will also cover the following topics:</p>
+<ul class="simple">
+<li><p>The introduction of performance tuning and optimization for TensorRT-LLM large-scale EP GB200 implementation.</p></li>
+<li><p>How to implement efficient large-scale EP support for B200/Hopper and other NVIDIA GPUs without MNNVL.</p></li>
+<li><p>The best practices to leverage large-scale EP and get performance gains.</p></li>
+<li><p>How to combine large-scale EP with other system optimization techniques.</p></li>
+</ul>
+<p>Even if, in this tech blog, we focus on TensorRT-LLM, we believe the core ideas and implementation can also be applied to other inference engines to help the inference performance on NVIDIA GPUs. Also, with the help of the community, we would like to figure out how to better modularize the current TensorRT-LLM large-scale EP implementation and make it more easily reusable by the community.</p>
+<p>Finally, in this tech blog, there are implementation details which are targeted towards the GB200 system, such as the communication components leveraging the GB200 MNNVL inter-GPU connection, and the MoE weight load/re-distributer module leveraging the high bandwidth C2C connection between Grace CPU and Blackwell GPU. Nevertheless, the overall design principle and software architecture can still apply to non-GB200 NVIDIA GPU systems. To facilitate the extension to other non-GB200 system, we have, on purpose, paid attention to the generalization of the design and implementation. These changes should be easily composable with other existing components.</p>
+</section>
+<section id="motivation-for-large-scale-ep">
+<h2>Motivation for large-scale EP<a class="headerlink" href="#motivation-for-large-scale-ep" title="Link to this heading">#</a></h2>
+<p>The main motivation of introducing large-scale EP (here means EP &gt; 8) comes from the following system considerations:</p>
+<ul class="simple">
+<li><p>We expect to reduce the execution latency thanks to the increased aggregated memory bandwidth to load the expert weights.</p></li>
+<li><p>We expect to increase the effective batch size to saturate the GPU computing power.</p></li>
+</ul>
+<p>Note that <strong>when the E2E execution time is dominated by the MoE GroupGEMM computation, by introducing large-scale EP, it is expected to see clear performance benefits. But if the E2E execution time is not dominated by the MoE GroupGEMM computation, then large-scale EP may bring limited performance benefit.</strong></p>
+<p>Also there isn’t free lunch in the system design. When the EP size increases up to greater than 8 (sometimes even less than 8), due to the sparsity execution nature of MoE models, it can inherently trigger the EP-level workload imbalance issue.</p>
+<p>And here are some empirical observations based on some datasets (<em>all the analyses below are done with the <strong>DeepSeek R1 model</strong>, on <strong>32 GB200 GPUs</strong>).</em></p>
+<section id="observations-over-one-machine-translation-dataset">
+<h3>Observations over one machine translation dataset<a class="headerlink" href="#observations-over-one-machine-translation-dataset" title="Link to this heading">#</a></h3>
+<p>Firstly let’s have an overview of the overall imbalance issues across layers:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture1.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 1: The routed token count from rank 0 to all the ranks(including rank 0), for decode iteration 1950, and all the MoE layers</em></sub></p>
+<p>In Figure 1, it can be seen clearly that for the MoE in layer 36, many more tokens are sent from <strong>rank 0</strong> to <strong>rank 13.</strong></p>
+<p>If we zoom on the MoE in the layer 36 and record its activated expert rank distribution, there clearly is a rank that is more heavily activated:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture2.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 2: The tokens received for each expert rank for layer 36</em></sub></p>
+<p>If we flatten the data to see the routed tokens for each expert, we can see that a few experts are more active than others:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture3.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 3: The tokens received for each expert for layer 36</em></sub></p>
+<p>It is also interesting to see that this kind of imbalance issue is very stable across multiple iterations, as shown on the following figure:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture4.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 4: The accumulated token counts received for each expert for layer 36, within 50 decode steps, and the local batch size=256.</em></sub></p>
+<p>Clearly, the hot experts in Figure 4 are actually the same as in Figure 3 which only have data for a single decode iteration.
+We have also done the duration-based analysis for local batch size=1 which correspond to a single request with observing the similar pattern:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture5.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 5: The accumulated token counts received for each expert  for layer 36, within 400 decode iterations, and the local batch size \= 1\.</em></sub></p>
+<p>To conclude the findings from this study over this machine translation dataset, we could say that:</p>
+<ul class="simple">
+<li><p>There are hot spots in some layers where the workload of some EP ranks can be much higher than others.</p></li>
+<li><p>This may be caused by the hottest expert or some hot experts to be located on the same rank.</p></li>
+<li><p>The routed token distributions can be the same for tens to hundreds of iteration steps or even more.</p></li>
+<li><p>For the execution of a single request, it also has the same hot experts between steps.</p></li>
+</ul>
+<p>And another natural question is whether the above observation can change significantly on other datasets. So we have done a similar analysis with the GSM8K dataset.</p>
+</section>
+<section id="observation-over-gsm8k-dataset">
+<h3>Observation over GSM8K dataset<a class="headerlink" href="#observation-over-gsm8k-dataset" title="Link to this heading">#</a></h3>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture6.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 6: The routed token count from rank 0 to all the ranks, for iteration 1950, and all the MoE layers</em></sub></p>
+<p>In Figure 6, compared with Figure 1, it can be seen that for GSM8K, the hot layer becomes layer 57 instead of layer 36. Then what about the concrete status of layer 36 for the GSM8K dataset?</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture7.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 7: routed token counts from EP rank 0 to other EP ranks, still taking the iteration 1950, MoE layer 36 as the example</em></sub></p>
+<p>Clearly from Figure 7, it can be observed that the workload imbalance is different from what was observed for the different dataset (in Figure 2).
+Based on Figure 8, it can be observed that the workload imbalance is relatively stable across multiple iterations on the GSM8K dataset too. It is the same as the previous machine translation dataset.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture8.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 8: The accumulated token counts sent from EP Rank 0 to all the ranks, for MoE layer 57 within 50 decode steps, local batch size=256</em></sub></p>
+<p>If we flatten the EP rank level data to expert-level data, we can have the following figure.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture9.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 9: The accumulated token counts received for each expert for layer 57, within 50 decode steps, and the local batch size=256.</em></sub></p>
+<p>The similar imbalance pattern also exists for a single request.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture10.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 10: The accumulated token counts received for each expert for layer 57, within 400 decode steps, for a single request</em></sub></p>
+<p>If we use another request, then we can still observe the expert imbalance issue, while the hot experts can be different with some in common (in this example it is expert 10).</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture11.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 11: The accumulated token counts received for each expert for layer 57, within 400 decode steps, for a single request</em></sub></p>
+<p>So combining the data analysis of two datasets, we have the following findings:</p>
+<ul class="simple">
+<li><p>EP level workload imbalance issue is common for large-scale EP inference on multiple datasets. And the EP imbalance severity can be different per layer. Also the EP imbalance issue is dataset sensitive.</p></li>
+<li><p>The EP rank level imbalance issue can be caused by a certain hottest expert or multiple hot experts staying on the same EP rank.</p></li>
+<li><p>The EP rank imbalance distribution is relatively stable across tens to hundreds of iterations.</p></li>
+<li><p>Though there is time-dimension stability of EP rank imbalance distribution, clearly different requests can have different EP imbalance distribution.</p></li>
+</ul>
+<p>Based on these findings, they can lead to our design consideration of TensorRT-LLM’s large-scale EP implementation:</p>
+<ul class="simple">
+<li><p>By design the EP imbalance issue needs to be considered to assure great E2E performance.</p></li>
+<li><p>Online EP Load Balancer(rather than only a Offline EP Load Balancer implementation) based on the real-time online request traffic is essential to ensure the robustness of EP balancer.</p></li>
+<li><p>The time-dimension stability of EP rank imbalance distribution can be leveraged to re-distribute the MoE weights to different EP ranks in an efficient manner.</p></li>
+</ul>
+<p>In the next section we will illustrate the high-level design.</p>
+</section>
+</section>
+<section id="high-level-design-introduction">
+<h2>High-level design introduction<a class="headerlink" href="#high-level-design-introduction" title="Link to this heading">#</a></h2>
+<p>Based on the detailed analysis and study in section <a class="reference internal" href="#motivation-of-large-scale-ep"><span class="xref myst">Motivation of large-scale EP</span></a>, it can clearly be observed that expert imbalance in EP is a common pattern for large-scale EP. This EP imbalance can clearly impede the overall system performance in the following ways:</p>
+<ul class="simple">
+<li><p>The hot EP rank will consume more memory (for activations) which can limit the effective max batch size scheduled during the inference process.</p></li>
+<li><p>More data will be sent to/received from the hot EP rank.</p></li>
+</ul>
+<p>Those issues can clearly result into a system-level congestion effect in which the hot EP rank will delay the overall E2E execution.</p>
+<p>To make sure large-scale EP can run well, careful considerations are needed to minimize the EP imbalance issue. The overall design is as follows:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture12.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 12: the high-level design of TensorRT-LLM large-scale EP</em></sub></p>
+<p>In this design, there are both CPU and GPU side logics:</p>
+<ul class="simple">
+<li><p>CPU side</p>
+<ul>
+<li><p>Implement the Replication &amp; Placement algorithms <strong>(Replication &amp; Placement Compute</strong> component) to achieve a more balanced EP strategy. Those are rather classical algorithms for which CPU computation is more suitable. Furthermore, by offloading this computation to the CPU, the interference with the GPU can be reduced. In the future, machine-learning based algorithms may also be explored and additional design consideration may be needed. The <strong>Replication &amp; Placement Compute</strong> component will generate the <strong>“Placement Info”</strong> which will then be consumed by both the GPU <strong>Routing</strong> logic and the CPU <strong>Update Weights &amp; Placement</strong> component. The <strong>Replication &amp; Placement Compute</strong> component will consume the <strong>Statistics Data</strong> generated by the <strong>Statistics</strong> component which runs on the GPU.</p></li>
+<li><p>Orchestrate the process (<strong>Update Weights &amp; Placemen</strong>t component) to update and reload the MoE weights from CPU host memory to GPU device memory. This component will also consume the <strong>Placement Info</strong> generated by the <strong>Replication &amp; Placement Compute</strong> component. Our scalable design allows us to reload the MoE weights from remote GPU memory via MNNVL or NIC.</p></li>
+</ul>
+</li>
+<li><p>GPU side</p>
+<ul>
+<li><p>This is the main execution workflow of inference. The following new GPU components are introduced with our design:</p>
+<ul>
+<li><p>EP communication kernels. In Figure 11, those are the <strong>Dispatch</strong> and <strong>Combine</strong> components.</p></li>
+<li><p>Online traffic data statistics collector (the <strong>Statistics</strong> component). This component collects the <strong>Statistics Data</strong> which is to be consumed by the <strong>Replication &amp; Placement Compute</strong> component.</p></li>
+<li><p>The MoE router logic (the <strong>Routing</strong> component). It sends tokens to the activated experts. It needs to be adjusted to support the dynamic placement of MoE weights. It also consumes the <strong>Placement Info</strong> generated by the <strong>Replication &amp; Placement Compute</strong> component.</p></li>
+<li><p>The MoE computation logic (the <strong>MoE</strong> component) also needs to be adjusted correspondingly.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Careful synchronization between CPU and GPU components is needed to ensure the validity of the entire execution process ; particularly, to avoid hangs, as well as invalid or sub-optimal executions.</p></li>
+</ul>
+<p>For the <strong>Update Weights &amp; Placemen</strong>t component, we identified two design choices:</p>
+<ul class="simple">
+<li><p>Bulk approach</p>
+<ul>
+<li><p>In this approach, when the MoE weight redistribution logic starts, the inference taking place on the current serving instance will have to be paused until the MoE weight redistribution process finishes. We estimate that it can lead to approximately <strong>0.5 ~</strong> <strong>1 second</strong> online serving stalls ; causing in the worst-cases request timeouts.  This kind of timeout or stalls can be mitigated at the system level by routing the requests to other serving instances or just request replays.</p></li>
+</ul>
+</li>
+<li><p>Layer-wise approach</p>
+<ul>
+<li><p>In this approach, the MoE weight redistribution is done layer by layer such that at each decode iteration only certain layers (it can be configured) will be impacted by a redistribution of their MoE weights. With this design, it will take several iterations to re-balance the MoE weights of all the layers. We expect this approach to have almost no impact on the user experience.</p></li>
+</ul>
+</li>
+</ul>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture13.png">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 13: One example of the layer-wise MoE weight re-distribution</em></sub></p>
+<p>In our current system, we choose to implement <strong>the layer-wise approach</strong> to minimize the impact on the online user experience. The bulk approach should be much easier to implement and we will not discuss it in this tech blog.
+To implement the layer-wise approach properly, we need to carefully evaluate the capability of different underlying HWs to decide on the concrete implementation.
+Let’s use GB200 as an example. In Figure 14, we illustrate the communication bandwidth of different HW elements in a GB200 node.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture14.png" width="500" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 14: high-level topology of GB200 system</em></sub></p>
+<p>Using the DeepSeek R1 model as an example, with FP4 precision, each MoE expert occupies 24MiB of memory space. There are 256 experts per layer. In total, that’s 58 MoE layers, plus 1 MTP layer. So the maximum amount of MoE weights which need to be redistributed, to achieve EP balance, is 348GiB.
+One GB200 node has 480GB LPDDR5X memory for each Grace CPU. In total, that’s 960GB of host memory across a NUMA domain. One GB200 node can host the entire MoE weights of a model like the DeepSeek R1 LLM in its CPU host memory. Based on it, the MoE weight redistribution can be done by moving the corresponding MoE weights from CPU host memory to GPU device memory.</p>
+<p>Let’s assume that we target <strong>50ms</strong> inter-token-latency (ITL) as our main latency constraint. Using back-of-the-envelope calculation, it can be computed that the amount of expert weights which can be moved from the MoE weight pool (can be kept in Grace CPU memory or GPU memory on another node) to the Blackwell GPU (to do the real MoE inference) for each decode iteration is:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture15.png" width="300" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 15: The theoretical expert count to be updated for each iteration with following 50ms ITL constraints, by using different HW as pools to store the full MoE weight</em></sub></p>
+<p>Based on this analysis, and, if we rely on the Grace CPU memory on each node to store the MoE weight pool, for each decode iteration, the weights of up to 300 experts can be redistributed to each GPU on the same GB200 node.
+Assuming our goal is to finish the MoE weight re-balancing for the full model within 5 decode iterations, here are some more concrete use-case studies:</p>
+<ul class="simple">
+<li><p>Use-case 1 (with balanced expert placement and no expert replication)</p>
+<ul>
+<li><p>64 GPUs with 4 Experts per GPU</p></li>
+<li><p>58 layers, 232 Experts per GPU</p></li>
+<li><p>Need 47 Expert Update / Iter, all the methods can satisfy the latency goal.</p></li>
+</ul>
+</li>
+<li><p>Use-case 2 (with both balanced expert placement and replication)</p>
+<ul>
+<li><p>64 GPUs or 72 GPUs with 5 Experts per GPU</p></li>
+<li><p>58 layers, 290 Experts per GPU</p></li>
+<li><p>Need 58 Expert Update / Iter, all the methods can satisfy the latency goal.</p></li>
+</ul>
+</li>
+<li><p>Use-case 3 (with both balanced expert placement and replication)</p>
+<ul>
+<li><p>36 GPUs with 8 Experts per GPU</p></li>
+<li><p>58 layers, 464 Experts per GPU</p></li>
+<li><p>Need 93 Expert Update / Iter, all the method can satisfy the latency goal.</p></li>
+</ul>
+</li>
+</ul>
+<p>In summary, based on the theoretical analysis, using Grace CPU memory as the pool to hold the full size MoE weights should allow us to achieve the EP (Expert-Parallelism) re-balancing within 5 decode iterations. If we relax the requirements to 10 or more iterations, there can be even more system implementation flexibility.</p>
+<p>Next we will introduce the implementation details of our large-scale EP system.</p>
+</section>
+<section id="ep-communication-kernels">
+<h2>EP communication kernels<a class="headerlink" href="#ep-communication-kernels" title="Link to this heading">#</a></h2>
+<p>We have evaluated multiple ways of implementing the EP communication kernels needed by large-scale EP, including DeepEP, other solutions and the development of an approach from scratch.</p>
+<p>The current technical decision is:</p>
+<ul class="simple">
+<li><p>For GB200, we implemented a new set of <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/pull/3504">custom EP communication kernels</a>.</p></li>
+<li><p>For non-GB200 systems (such as B200 or Hopper), we chose to integrate DeepEP directly, with some potential enhancement.</p></li>
+</ul>
+<p>The considerations are:</p>
+<ul class="simple">
+<li><p>DeepEP is a great piece of work done by the DeepSeek team. When we started the TensorRT-LLM large-scale EP efforts, our first focus was on GB200. We chose to implement our own custom EP communication kernels as it was easier to introduce optimizations requiring the GB200 MNNVL capability. Also, based on our current evaluation, DeepEP does not provide CUDA graph compatibility for all the scenarios. We believe that CUDA graph is needed for the scenario we are interested in.</p></li>
+<li><p>When we started the efforts to enable large-scale EP on Hopper, we concluded that DeepEP could be adapted and meet our needs on this platform. We plan to extend DeepEP to work for B200 in the future.</p></li>
+</ul>
+<p>We are also actively evaluating the possibility of consolidating GB200 and non-GB200 EP communication kernels into a single solution to make the system simpler, and we will keep the community posted on the status.
+Now let’s talk a little bit more about the optimizations introduced into the custom EP communication kernel implementations.</p>
+<section id="motivation-of-ep-communication-kernels-for-gb200">
+<h3>Motivation of EP communication kernels for GB200<a class="headerlink" href="#motivation-of-ep-communication-kernels-for-gb200" title="Link to this heading">#</a></h3>
+<p>In the Decoding Phase with Prefill-Decoding (PD) separation, we observed that the batch size may not be very large, such that latency is a significant concern. In this context, compatibility with CUDA Graph is a strong requirement.
+<a class="reference external" href="https://github.com/NVIDIA/nccl">NCCL</a> is a great GPU communication library which provides highly efficient communication kernels and primitives.
+For now, its Send and Recv operations require the data size to be explicitly specified when invoking with <code class="docutils literal notranslate"><span class="pre">ncclSend</span></code>/<code class="docutils literal notranslate"><span class="pre">ncclRecv</span></code>.
+However, in large expert parallel (large-EP) scenarios, the data size to be transferred is determined dynamically based on the model’s output at each iteration.
+With the current NCCL’s communication interface, an explicit synchronization is required to send the communication size back to the CPU and launch NCCL calls from the CPU with the corresponding data size. This would break CUDA Graph compatibility.
+This limitation has forced us to develop high performance communication kernels compatible with CUDA graph and that can accept communication sizes directly from GPU memory.
+We also wanted those kernels, for GB200, to take of advantage of the MNNVL’s memory bandwidth.</p>
+</section>
+<section id="ep-communication-kernels-implementation">
+<h3>EP communication kernels implementation<a class="headerlink" href="#ep-communication-kernels-implementation" title="Link to this heading">#</a></h3>
+<p>Our kernels adopt a communication approach similar to NCCL’s LL128 primitive. As this approach strikes a good balance between latency and bandwidth, it is well-suited for LLM inference.
+Our custom kernels can read the communication size directly from GPU memory and are compatible with CUDA Graph even when the data size varies across runs.</p>
+<p>In our implementation, we use the CUDA’s Driver API to establish a peer-to-peer (P2P) buffer via MNNVL as a workspace.
+Each GPU can access the workspace of other GPUs. The workspace is divided into multiple channels, each assigned to a remote GPU as a write buffer.
+Those write buffers are used in a FIFO manner, with flags used to synchronize FIFO status and avoid data corruption.
+More details can be found in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/pull/3504">PR 3504</a>.</p>
+</section>
+</section>
+<section id="ep-load-balancer">
+<h2>EP Load Balancer<a class="headerlink" href="#ep-load-balancer" title="Link to this heading">#</a></h2>
+<p>TensorRT-LLM implements a set of functionalities to achieve EP Load Balancing. There are several key components:</p>
+<section id="python-interface">
+<h3>Python Interface<a class="headerlink" href="#python-interface" title="Link to this heading">#</a></h3>
+<p>The Python interface layer provides a user-friendly PyTorch/Python native interface to access the MoE Load Balancing implementations, such as the Python wrapper for the GPU/CPU synchronization logics and the online data statistics collection, and other logics implemented in 4.2 to 4.4.</p>
+</section>
+<section id="c-extension">
+<h3>C++ extension<a class="headerlink" href="#c-extension" title="Link to this heading">#</a></h3>
+<p>The C++ extension acts as the bridge between the PyTorch/Python interface and the C++/CUDA core implementations.</p>
+</section>
+<section id="core-implementations-of-the-host-logic">
+<h3>Core implementations of the host logic<a class="headerlink" href="#core-implementations-of-the-host-logic" title="Link to this heading">#</a></h3>
+<p>The host-side core logic implements the following key parts:</p>
+<ul class="simple">
+<li><p>Load balancing algorithms</p>
+<ul>
+<li><p>Replication algorithm</p></li>
+<li><p>Placement algorithm</p></li>
+</ul>
+</li>
+<li><p>Orchestration logic of MoE weight updates</p></li>
+<li><p>MoE weight update logic</p></li>
+</ul>
+</section>
+<section id="core-implementations-of-the-gpu-logic">
+<h3>Core implementations of the GPU logic<a class="headerlink" href="#core-implementations-of-the-gpu-logic" title="Link to this heading">#</a></h3>
+<p>The GPU core logic contains the following components:</p>
+<ul class="simple">
+<li><p>Online traffic statistics collection</p>
+<ul>
+<li><p>To reduce the CPU-GPU back-and-forth synchronization cost, we choose to implement the online traffic statistic logic on the GPU side.</p></li>
+</ul>
+</li>
+<li><p>Expert routing logic</p>
+<ul>
+<li><p>The MoE routing logic needs to be enhanced to adapt with the dynamic EP balance impact.</p></li>
+</ul>
+</li>
+</ul>
+<p>There are GPU/CPU synchronization components implemented. More details can be found in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/pull/4384">PR 4384</a> and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/pull/4495">PR 4495</a>.</p>
+<p>Based on these core utilities, there are two versions of EP Load Balancer in TensorRT-LLM: Offline EP Load Balancer and Online EP Load Balancer.</p>
+</section>
+<section id="online-ep-load-balancer">
+<h3>Online EP Load Balancer<a class="headerlink" href="#online-ep-load-balancer" title="Link to this heading">#</a></h3>
+<p>For production deployment needs, Online EP Load Balancer is recommended since it can adapt itself to the change in the online traffic pattern, dynamically, thus with more performance guarantees.</p>
+<p>However, the Online EP Load Balancer faces several challenges.</p>
+<p>First, load balancing introduces dynamic Expert placement. A single Expert’s location may shift based on current workload. For example, if Expert 0 and Expert 1, originally assigned to Rank 0, both become hot experts, the load balancing policy might redistribute them to different ranks alongside cold experts, which necessitates timely updates to the weight data.</p>
+<p>We aim for the Online Load Balancer to react swiftly to changes in request patterns and adjust Expert assignments to avoid load imbalance issues. Importantly, we do not want the balancing process to interfere with the online inference execution process, nor do we want to employ a “Stop-The-World” (Bulk) strategy for updating weights.</p>
+<p>In large MoE models (such as DeepSeek R1) during the decoding phase, batch sizes are often small, making CUDA Graph an effective acceleration method; especially when high TPS per user is required. This benefit is even more pronounced on platforms like GB200. For this reason, we want the entire load balancing mechanism to be compatible with CUDA Graph.</p>
+<p>To avoid invalidating pre-captured CUDA Graphs, we perform in-place weight updates by writing new Expert weights into the same memory locations, rather than swapping out tensor pointers. This ensures the weights tensor address remains unchanged in the Model Engine.</p>
+<p>In this design, each Expert Slot serves as a container for holding an Expert’s weights, decoupled from any specific Expert. The number of Expert Slots must be greater than or equal to the total number of Experts so that each Expert always has at least one available Slot. Hot Experts may occupy multiple Slots. Each Slot is identified by a SlotId.</p>
+<p>Since the MoE model’s routing logic outputs ExpertIds (not SlotIds), we maintain a routing table from ExpertId to SlotId which is updated by the load balancing policy, periodically. The Load Balancer Routing module uses the current routing table (Expert replication information and slots) to map each token to a suitable Expert Slot.</p>
+<p>To make weight updates non-blocking and avoid “Stop-The-World”, we use a layer-wise update approach. After a layer’s forward pass completes and before its next forward pass starts, we perform the weight balancing for that layer; the next forward pass for the same layer should wait until the last update is done if it happens at this iteration.</p>
+<p>As the forward execution is typically driven by a single Python thread invoking a sequence of PyTorch operations, we offload the weight update routine to a background C++ thread. The Python side only initializes the Expert Slots and registers Expert Weights in shared host memory.</p>
+<p>During forward execution, we insert lightweight lock/unlock kernels before and after MoE computations, as well as kernels for collecting statistics and assigning SlotIds to ExpertIds. These kernels must be short and overlap-friendly to minimize performance impact. As long as the CPU weights update thread can finish its work on time, the lock/unlock will be very short. All, except for the routing kernel, are lightweight and can easily overlap with forward kernels in different CUDA streams; the routing kernel is the primary optimization focus.</p>
+<p>On GB200, we utilize MNNVL for inter-GPU communication during Expert dispatch and combine. Expert weights reside in host memory and are brought into GPU memory via C2C to support asynchronous updates. A multi-threaded Host Copy Engine manages this process, auto-detecting NUMA topology and choosing optimal CPU cores, enabling full asynchrony with model forward passes.</p>
+<p>On servers without C2C but with PCIe, if cross-node communication is required, network and weight updates may compete for PCIe bandwidth, requiring additional tuning and design consideration. We have not implemented the copy engine for PCIe servers yet and it is in list of future tasks.</p>
+</section>
+<section id="offline-ep-load-balancer">
+<h3>Offline EP Load Balancer<a class="headerlink" href="#offline-ep-load-balancer" title="Link to this heading">#</a></h3>
+<p>Online EP balancer is more suitable for production deployment needs to react timely to online traffic changes. However, Offline EP Balancer provides a lightweight way for performance study/debugging and validation. You can refer to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/pull/4695">this PR</a> to learn more about the implementation of the Offline EP Load Balancer. Also there is a tool provided to collect statistics about the expert activation distribution which can be used as the input to deduce the EP balancing placement strategy. You can refer to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/feat/large-ep/examples/ep_load_balancer#offline-ep-load-balancer">this</a> doc to learn more details as well as how how to run through the Offline EP Load Balancer in E2E approach.</p>
+</section>
+</section>
+<section id="e2e-evaluation">
+<h2>E2E evaluation<a class="headerlink" href="#e2e-evaluation" title="Link to this heading">#</a></h2>
+<section id="the-effect-of-ep-load-balancer">
+<h3>The effect of EP Load Balancer<a class="headerlink" href="#the-effect-of-ep-load-balancer" title="Link to this heading">#</a></h3>
+<section id="id1">
+<h4>Offline EP Load Balancer<a class="headerlink" href="#id1" title="Link to this heading">#</a></h4>
+<p>As shown by Figure 1, on the machine translation dataset, MoE layer 36 suffers from extreme expert load imbalance issues, so we use that layer to illustrate the effect of EP Load Balancer. We still run DeepSeek-R1 with 32-way expert parallelism on 32 GB200 GPUs.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture16.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 16: The routed token count by receiving ranks (x-axis) and iterations (y-axis) at layer 36 (No EPLB)</em></sub></p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture17.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 17: The routed token count by experts (x-axis) and iterations (y-axis) at layer 36 (No EPLB)</em></sub></p>
+<p>Figure 16 displays the routed token count by receiving ranks over 50 iterations, which could represent the workload for each rank. Rank 13 receives significantly more tokens than all other ranks, and such an imbalanced workload distribution is almost constant over iterations. Figure 17 breaks down the workload to experts. Clearly, two hot experts on rank 13 cause the excessive pressure on this rank.</p>
+<p>With the above statistics, we can perform offline EPLB. One potential strategy is to maintain the 32-way expert parallelism while increasing expert slots from 8 to 9 per rank. This results in 32 redundant experts and 288 expert slots in total. Figures 18 and 19 show the routed token count after EPLB. Clearly, the per-rank token distribution is much more balanced, and there are no hot experts anymore.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture18.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 18: The routed token count by receiving ranks (x-axis) and iterations (y-axis) at layer 36 (EPLB with 9 per-rank slots and EP 32)</em></sub></p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture19.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 19: The routed token count by experts (x-axis) and iterations (y-axis) at layer 36 (EPLB with 9 per-rank slots and EP 32)</em></sub></p>
+<p>Another EPLB strategy is to maintain 8 expert slots per rank while increasing expert parallelism to 36 ways. This strategy also results in 32 redundant experts and 288 expert slots in total. As displayed by Figures 20 and 21, the workloads also become balanced across ranks or expert slots.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture20.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 20: The routed token count by receiving ranks (x-axis) and iterations (y-axis) at layer 36 (EPLB with 8 per-rank slots and EP 36)</em></sub></p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture21.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 21: The routed token count by experts (x-axis) and iterations (y-axis) at layer 36 (EPLB with 8 per-rank slots and EP 36)</em></sub></p>
+<p>For each layer and iteration, the load imbalance can be measured using simple metrics such as the standard deviation or the imbalance ratio. Given the routed token counts for all ranks (or experts), the imbalance ratio is defined as $(max - mean) / mean$, which represents the excessive workload received by the hottest rank (or expert). A perfectly balanced load would have an imbalance ratio of 0.</p>
+<p>Table 1 reports the standard deviation and imbalance ratio for the aforementioned cases. Each number is averaged from the per-layer per-iteration metrics. Without EPLB, the load imbalance is significant – on average, the hottest rank receives 1.56 times more routed tokens than the mean. EPLB can effectively reduced the load imbalance – on average, the hottest rank receives only about 0.11 times more routed tokens than the mean.</p>
+<div class="pst-scrollable-table-container"><table class="table">
+<thead>
+<tr class="row-odd"><th class="head text-center"><p></p></th>
+<th class="head text-center"><p>By rank</p></th>
+<th class="head text-center"><p></p></th>
+<th class="head text-center"><p></p></th>
+<th class="head text-center"><p>By expert slot</p></th>
+<th class="head text-center"><p></p></th>
+<th class="head text-center"><p></p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td class="text-center"><p></p></td>
+<td class="text-center"><p>Average</p></td>
+<td class="text-center"><p>Std. Dev.</p></td>
+<td class="text-center"><p>Imb. Ratio</p></td>
+<td class="text-center"><p>Average</p></td>
+<td class="text-center"><p>Std. Dev.</p></td>
+<td class="text-center"><p>Imb. Ratio</p></td>
+</tr>
+<tr class="row-odd"><td class="text-center"><p>No EPLB (8 per-rank slots and EP 32)</p></td>
+<td class="text-center"><p>1024</p></td>
+<td class="text-center"><p>491.6</p></td>
+<td class="text-center"><p>1.564</p></td>
+<td class="text-center"><p>128</p></td>
+<td class="text-center"><p>164.1</p></td>
+<td class="text-center"><p>10.948</p></td>
+</tr>
+<tr class="row-even"><td class="text-center"><p>EPLB (9 per-rank slots and EP 32)</p></td>
+<td class="text-center"><p>1024</p></td>
+<td class="text-center"><p>52.0</p></td>
+<td class="text-center"><p>0.109</p></td>
+<td class="text-center"><p>114</p></td>
+<td class="text-center"><p>77.8</p></td>
+<td class="text-center"><p>1.792</p></td>
+</tr>
+<tr class="row-odd"><td class="text-center"><p>EPLB (8 per-rank slots and EP 36)</p></td>
+<td class="text-center"><p>1024</p></td>
+<td class="text-center"><p>53.9</p></td>
+<td class="text-center"><p>0.115</p></td>
+<td class="text-center"><p>128</p></td>
+<td class="text-center"><p>87.5</p></td>
+<td class="text-center"><p>1.791</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+<p><em>Table 1: The standard deviation and imbalance ratio (average of per-layer and per-iteration metrics)</em></p>
+</section>
+<section id="id2">
+<h4>Online EP Load Balancer<a class="headerlink" href="#id2" title="Link to this heading">#</a></h4>
+<p>In the previous section, we demonstrated the impact of the Offline EP Load Balancer. Given our implementation of the Online EP Load Balancer, we further examine the dynamic patterns of EP balancing in online conditions.
+Let’s still use the machine translation dataset, DeepSeek R1 model,  layer 36 (which is shown in Figure 1) as the example to understand the online behaviour:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture22.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 22: The token count sent from rank 0 to all the ranks, run on GB200, with EP32, local batch size=256, with 256 slots(no replication), so each rank hosts 8 experts</em></sub></p>
+<p>From Figure 22, it is clear that from iteration 1963, since the EPLB has taken into effect, the original hottest rank 13 is no longer the hot rank and the original workload sent to rank 13 has been redistributed to rank 0 and rank 1.</p>
+<p>In Figure 22, only placement adjustment has been done by the Online EPLB. If we further introduce expert replication, the balancing can be further improved, as shown on the following figure:</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture23.png" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 23: The token count sent from rank 0 to all the ranks, run on GB200, with EP32, local batch size=256, with 288 slots(with replication), so each rank hosts 9 experts</em></sub></p>
+<p>Clearly, by introducing expert replication when doing the EPLB, the EP balancing can be further improved.
+Further complicated experiments can be designed to observe the Online EPLB taking into effect periodically during the online serving process to balance the EP workload in a dynamic way and we welcome the community to report any interesting EPLB pattern observation to us.</p>
+</section>
+</section>
+<section id="performance-study">
+<h3>Performance study<a class="headerlink" href="#performance-study" title="Link to this heading">#</a></h3>
+<p>Note: all the representative workloads illustrated in this section are from the performance traces extracted from DeepSeek R1 inference execution. The E2E performance tuning/optimization is still ongoing and we will discuss them in the future technical blogs.</p>
+<p>Let’s use some representative workloads to illustrate the performance impact with large-scale EP.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture24.png" width="500" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 24: EP impact over MoE Group GEMM and EP communication</em></sub></p>
+In Figure 24, it can be observed that by increasing the EP size from 4 to 72, the MoE Group GEMM computation time gets reduced, while the EP communication time (for EP4/EP8 Reduce/Scatter is used, while for EP>8 All2All is used) stays almost constant.
+When the EP size increases from 18 to 32, the speed-up diminishes. We are working on optimizing it.
+<p>Next, let’s use some representative workloads to understand the performance impact with EPLB.</p>
+<div align="center">
+<figure>
+  <img src="../media/tech_blog4_Picture25.png" width="500" >
+</figure>
+</div>
+<p align="center"><sub><em>Figure 25: EPLB performance impact</em></sub></p>
+Clearly in Figure 25, we can see that EPLB brings a clear performance improvement when the EP size increases, for both MoE GroupGEMM and EP communication times.
+</section>
+</section>
+<section id="reproducing-steps">
+<h2>Reproducing steps<a class="headerlink" href="#reproducing-steps" title="Link to this heading">#</a></h2>
+<p>Currently to run through the reproducing steps described in this section, please, use this <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/feat/large-ep/tensorrt_llm">feature branch</a>. It will get merged to the main branch soon.</p>
+<section id="id3">
+<h3>The effect of EP Load Balancer<a class="headerlink" href="#id3" title="Link to this heading">#</a></h3>
+<p>Please, refer to the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/feat/large-ep/examples/ep_load_balancer">EP Load Balancer example</a> for how to reproduce the results for the offline EP Load Balancer.</p>
+<section id="step-1-run-inference-and-collect-statistics">
+<h4>Step 1: Run inference and collect statistics<a class="headerlink" href="#step-1-run-inference-and-collect-statistics" title="Link to this heading">#</a></h4>
+<p>To generate the necessary statistics for load rebalancing, run your model on a target dataset and count the routed expert IDs during inference. Once the counting process is complete, the statistics will be saved for further processing.</p>
+<p>Set up some environment variables:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span><span class="w"> </span><span class="nv">MODEL_NAME</span><span class="o">=</span>deepseek-ai/DeepSeek-R1
+<span class="nb">export</span><span class="w"> </span><span class="nv">MODEL_PATH</span><span class="o">=</span>&lt;YOUR_MODEL_PATH&gt;
+<span class="c1"># Set the expert statistic data path</span>
+<span class="nb">export</span><span class="w"> </span><span class="nv">EXPERT_STATISTIC_PATH</span><span class="o">=</span>./expert_statistic
+<span class="c1"># Enable counting of routed expert IDs from iteration 100 to iteration 200</span>
+<span class="nb">export</span><span class="w"> </span><span class="nv">EXPERT_STATISTIC_ITER_RANGE</span><span class="o">=</span><span class="m">100</span>-200
+</pre></div>
+</div>
+<p>Prepare a dataset following the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/performance/perf-benchmarking.md#preparing-a-dataset">benchmarking documentation</a> and save it as <code class="docutils literal notranslate"><span class="pre">./dataset.json</span></code>.</p>
+<p>Run 32-way expert parallelism inference on the prepared dataset. Please refer to the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/llm-api/llm_mgmn_trtllm_bench.sh">LLM API MGMN example</a> for details on running <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span></code> on Slurm.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cat<span class="w"> </span>&gt;<span class="w"> </span>./extra_llm_api_options.yaml<span class="w"> </span><span class="s">&lt;&lt;EOF</span>
+<span class="s">enable_attention_dp: true</span>
+<span class="s">use_cuda_graph: true</span>
+<span class="s">EOF</span>
+
+trtllm-llmapi-launch<span class="w"> </span><span class="se">\</span>
+trtllm-bench<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">MODEL_NAME</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--model_path<span class="w"> </span><span class="si">${</span><span class="nv">MODEL_PATH</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>throughput<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--tp<span class="w"> </span><span class="m">32</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--ep<span class="w"> </span><span class="m">32</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--extra_llm_api_options<span class="w"> </span>./extra_llm_api_options.yaml<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--kv_cache_free_gpu_mem_fraction<span class="w"> </span><span class="m">0</span>.75<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--backend<span class="w"> </span>pytorch<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--dataset<span class="w"> </span>./dataset.json<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--warmup<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--eos_id<span class="w"> </span>-1
+</pre></div>
+</div>
+<p>After inference, review the dumped statistic files in <code class="docutils literal notranslate"><span class="pre">$EXPERT_STATISTIC_PATH</span></code>. Run the <code class="docutils literal notranslate"><span class="pre">examples/ep_load_balancer/report_load_statistics.py</span></code> script to show the standard deviation and imbalance ratio metrics:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>examples/ep_load_balancer/report_load_statistics.py<span class="w"> </span>--expert_statistic_path<span class="w"> </span><span class="nv">$EXPERT_STATISTIC_PATH</span>
+</pre></div>
+</div>
+<p>The output would look like:</p>
+<div class="highlight-txt notranslate"><div class="highlight"><pre><span></span>Load statistics:
+           mean         std  imbalance-ratio
+3        1024.0  187.955200         0.498043
+4        1024.0  202.728516         0.537602
+5        1024.0  209.339981         0.458676
+...
+58       1024.0  570.880676         2.461014
+59       1024.0  341.339447         0.717498
+60       1024.0  381.045471         1.119648
+average  1024.0  491.651199         1.564272
+</pre></div>
+</div>
+</section>
+<section id="step-2-generate-the-eplb-configuration">
+<h4>Step 2: Generate the EPLB configuration<a class="headerlink" href="#step-2-generate-the-eplb-configuration" title="Link to this heading">#</a></h4>
+<p>Use the provided <code class="docutils literal notranslate"><span class="pre">examples/ep_load_balancer/generate_eplb_config.py</span></code> script to convert the collected statistics into an EPLB configuration file. Specify the target expert parallelism size (<code class="docutils literal notranslate"><span class="pre">--ep_size</span></code>) and the total number of slots (<code class="docutils literal notranslate"><span class="pre">--num_slots</span></code>) that will be used for deployment. For example, if we choose to maintain 8 expert slots per rank while increasing expert parallelism to 36 ways, there should be 32 redundant experts and 288 expert slots in total.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>examples/ep_load_balancer/generate_eplb_config.py<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--ep_size<span class="w"> </span><span class="m">36</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--num_slots<span class="w"> </span><span class="m">288</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--expert_statistic_path<span class="w"> </span><span class="nv">$EXPERT_STATISTIC_PATH</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--output_path<span class="w"> </span>./moe_load_balancer.yaml
+</pre></div>
+</div>
+<p>The <code class="docutils literal notranslate"><span class="pre">./moe_load_balancer.yaml</span></code> file would look like:</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">initial_global_assignments</span><span class="p">:</span>
+<span class="w">  </span><span class="nt">3</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">138</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">81</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">60</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">...</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">69</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">250</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">77</span><span class="p p-Indicator">]</span>
+<span class="w">  </span><span class="nt">4</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">24</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">243</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">72</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">...</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">90</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">251</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">52</span><span class="p p-Indicator">]</span>
+<span class="w">  </span><span class="nt">5</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">120</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">162</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">246</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">...</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">14</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">192</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">171</span><span class="p p-Indicator">]</span>
+<span class="w">  </span><span class="l l-Scalar l-Scalar-Plain">...</span>
+<span class="w">  </span><span class="nt">58</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">67</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">70</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">160</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">...</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">212</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">103</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">125</span><span class="p p-Indicator">]</span>
+<span class="w">  </span><span class="nt">59</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">45</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">142</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">152</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">...</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">99</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">205</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">49</span><span class="p p-Indicator">]</span>
+<span class="w">  </span><span class="nt">60</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">34</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">162</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">119</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">...</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">234</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">26</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">129</span><span class="p p-Indicator">]</span>
+<span class="nt">num_slots</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">288</span>
+<span class="nt">layer_updates_per_iter</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0</span>
+</pre></div>
+</div>
+</section>
+<section id="step-3-run-inference-with-the-eplb-configuration">
+<h4>Step 3: Run inference with the EPLB configuration<a class="headerlink" href="#step-3-run-inference-with-the-eplb-configuration" title="Link to this heading">#</a></h4>
+<p>Set up some environment variables:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Set a new expert statistic data path</span>
+<span class="nb">export</span><span class="w"> </span><span class="nv">EXPERT_STATISTIC_PATH</span><span class="o">=</span>./expert_statistic_eplb
+<span class="c1"># Enable counting of routed expert IDs from iteration 100 to iteration 200</span>
+<span class="nb">export</span><span class="w"> </span><span class="nv">EXPERT_STATISTIC_ITER_RANGE</span><span class="o">=</span><span class="m">100</span>-200
+</pre></div>
+</div>
+<p>Run 36-way expert parallelism inference with the EPLB configuration incorporated:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cat<span class="w"> </span>&gt;<span class="w"> </span>./extra_llm_api_options_eplb.yaml<span class="w"> </span><span class="s">&lt;&lt;EOF</span>
+<span class="s">enable_attention_dp: true</span>
+<span class="s">use_cuda_graph: true</span>
+<span class="s">moe_load_balancer: ./moe_load_balancer.yaml</span>
+<span class="s">EOF</span>
+
+trtllm-llmapi-launch<span class="w"> </span><span class="se">\</span>
+trtllm-bench<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">MODEL_NAME</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--model_path<span class="w"> </span><span class="si">${</span><span class="nv">MODEL_PATH</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>throughput<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--tp<span class="w"> </span><span class="m">36</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--ep<span class="w"> </span><span class="m">36</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--extra_llm_api_options<span class="w"> </span>./extra_llm_api_options_eplb.yaml<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--kv_cache_free_gpu_mem_fraction<span class="w"> </span><span class="m">0</span>.75<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--backend<span class="w"> </span>pytorch<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--dataset<span class="w"> </span>./dataset.json<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--warmup<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--eos_id<span class="w"> </span>-1
+</pre></div>
+</div>
+<p>Run the <code class="docutils literal notranslate"><span class="pre">examples/ep_load_balancer/report_load_statistics.py</span></code> script again:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>examples/ep_load_balancer/report_load_statistics.py<span class="w"> </span>--expert_statistic_path<span class="w"> </span><span class="nv">$EXPERT_STATISTIC_PATH</span>
+</pre></div>
+</div>
+<p>The output would look like:</p>
+<div class="highlight-txt notranslate"><div class="highlight"><pre><span></span>Load statistics:
+           mean        std  imbalance-ratio
+3        1024.0  37.612328         0.081947
+4        1024.0  42.367714         0.093256
+5        1024.0  42.623219         0.092623
+...
+58       1024.0  49.167507         0.113420
+59       1024.0  44.529514         0.092314
+60       1024.0  48.408348         0.101029
+average  1024.0  53.976442         0.115378
+</pre></div>
+</div>
+<blockquote>
+<div><p><strong>Note:</strong> Counting expert IDs can significantly hurt performance, so remember to disable it by unsetting <code class="docutils literal notranslate"><span class="pre">EXPERT_STATISTIC_ITER_RANGE</span></code> when running inference for benchmarking or production purposes.</p>
+</div></blockquote>
+</section>
+</section>
+<section id="miscellaneous">
+<h3>Miscellaneous<a class="headerlink" href="#miscellaneous" title="Link to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>GB200 NUMA binding</strong>: Since on GB200, GPU memory are also on NUMA nodes, system can also use GPU’s memory. It is suggested to use <code class="docutils literal notranslate"><span class="pre">numactl</span> <span class="pre">-m</span> <span class="pre">0,1</span></code> to bind memory to CPU nodes if you don’t want that happen.</p></li>
+<li><p><strong>Shared Memory Clean Up</strong>: To achieve online load balance, all expert weights are stored in shared host memory. 4 ranks on same GB200 node share the same expert weights to save memory. Normally, these shared host memory will be cleaned up at process exit. But if an abnormal exit happens, they may not get chance to be cleaned. In that case, you may need to manually check <code class="docutils literal notranslate"><span class="pre">/dev/shm</span></code> directory and delete <code class="docutils literal notranslate"><span class="pre">/dev/shm/moe_shared_*</span></code> if any.</p></li>
+</ul>
+</section>
+</section>
+<section id="expanded-thoughts">
+<h2>Expanded thoughts<a class="headerlink" href="#expanded-thoughts" title="Link to this heading">#</a></h2>
+<p>We deeply acknowledge the system innovation from the DeepSeek team. The introduction of the large-scale EP support into their in-house inference system and their open spirit of sharing their engineering insights with the community is extremely valuable and has already boost the performance of inference system design.
+<strong>Also we want to point out that there are no magical solutions when doing system design and optimization, such as large-scale EP.</strong>
+Based on our current performance analysis, when you plan to apply large-scale EP, you should take the following factors into considerations:</p>
+<ul class="simple">
+<li><p>Is the MoE GroupGEMM computation time an E2E performance bottleneck?</p>
+<ul>
+<li><p>Large-scale EP mainly helps reduce the MoE GroupGEMM execution time by reducing expert weight loading pressure and, thus, increases the compute intensity of the MoE GroupGEMM layer. For your workload setting, if the MoE GroupGEMM computation is not the bottleneck, then large-scale EP may not help much.</p></li>
+</ul>
+</li>
+<li><p>The latency constraints.</p>
+<ul>
+<li><p>Large-scale EP mostly helps when there are strict latency constraints, especially on GB200/B200 with more memory capacity.  For GPUs with less memory capacity, for scenarios with less latency constraints, large-scale EP can still help as it helps achieve higher concurrency and better tokens/s/GPU.</p></li>
+</ul>
+</li>
+<li><p>The available HW spec.</p>
+<ul>
+<li><p>The optimal configuration for large-scale EP depends on GPU specifications - including memory bandwidth, capacity, inter-GPU bandwidth, and compute power - which determine both whether to employ large-scale EP and the ideal degree of parallelism.</p></li>
+</ul>
+</li>
+<li><p>System complexity and the production deployment constraints.</p>
+<ul>
+<li><p>Without fault tolerance guarantee, large-scale EP can increase the online system failure ratio. Even if it is possible to do cluster level coordination to route the traffic to other running serving instances when certain large-scale EP serving instances fail, the large number of GPUs required for a single-instance deployment of large-scale EP can increase system level deployment challenges.</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>In the future, we plan to summarize and share more of the best practices of deploying with large-scale EP techniques.</strong></p>
+<p><strong>Please use your own judgement to decide whether to use large-scale EP into your system or not, and when you use it, what is the suitable EP size and concrete deployment settings suitable for your own requirements.</strong></p>
+<p>The current TensorRT-LLM large-scale EP implementation is not perfect and there are still known limitations (community contributions are welcome to help us improve). For example, we need:</p>
+<ul class="simple">
+<li><p>More platforms coverage</p>
+<ul>
+<li><p>Extending the support to cover other non-GB200 NVIDIA GPU HWs. <strong>We are actively working on this now.</strong></p></li>
+<li><p>Currently the large-EP support only covers NVFP4 data precision, incremental efforts are needed to cover FP8 and INT8/INT4 data precision.</p></li>
+</ul>
+</li>
+<li><p>Performance</p>
+<ul>
+<li><p>Further performance tuning and optimizations. <strong>We are actively working on this now.</strong></p></li>
+<li><p>More validation with workloads close to production traffic. <strong>Here we highly welcome the community’s feedback to help us calibrate TensorRT-LLM large-scale EP implementation based on more concrete workloads.</strong></p></li>
+<li><p>The thorough validation of combination with other inference core features, such as dis-aggregated serving, speculative decoding, validation on more MoE model families, etc. <strong>We are actively working on this now.</strong></p></li>
+</ul>
+</li>
+<li><p>Ease-of-use</p>
+<ul>
+<li><p>Easy customization</p>
+<ul>
+<li><p>We believe large-scale EP can be decomposed into at least two layers:</p>
+<ul>
+<li><p>A core layer which developed by inference engine developers. This layer contains the customized EP communication kernels, the synchronization logic between CPU and GPU, the MoE weight re-distributed logic.</p></li>
+<li><p>A strategy layer which can be co-developed by the inference engine developers as well as machine learning researchers. This part contains tools to collect the online traffic statistics in different approaches, and algorithms for the optimal replication and placement of experts.</p></li>
+</ul>
+</li>
+<li><p>Based on this understanding, we plan to make components close to the strategy layer easier to be extended and customized by community users. We hope to encourage better ideas to emerge.</p></li>
+</ul>
+</li>
+<li><p>Based on user inputs of the deployment requirements (ISL/OSL, latency constraints, HW spec), we hope to be able to automatically recommend the best EP setting.</p></li>
+</ul>
+</li>
+<li><p>Fault tolerance</p>
+<ul>
+<li><p>Because large-scale EP deployment solution may lead to an increased fault ratio of the online deployment system, it may increase the need for cross-layer interactions with multiple components of the E2E LLM inference system on NVIDIA GPUs. This includes the low-level communication kernel, the cluster-level orchestrator and scheduler, etc. We are actively working with various NVIDIA engineering teams to push forward on this.</p></li>
+</ul>
+</li>
+</ul>
+<p>We believe the current implementation can be viewed as a reasonable E2E large-scale EP implementation and we encourage the community to try new ideas and performance validation. We encourage the community to share feedback to help us move fast in this area.  We are actively tracking the TensorRT-LLM large-scale EP execution in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues/4127">this</a> GitHub issue to ensure transparency to the community.</p>
+</section>
+<section id="acknowledgement">
+<h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="Link to this heading">#</a></h2>
+<p>The large-scale EP work is another great team effort, spanning kernel-level optimizations, runtime enhancements, and systematic performance analysis and tuning. While we cannot individually acknowledge every contributor, we are proud to recognize the dedicated team of engineers whose collective expertise has helped advance the state-of-the-art in terms of performance in TensorRT-LLM.
+Through this collaborative endeavor, we have developed valuable insights to allow us improve GPU utilization for large language model inference. We hope that the techniques and the experience shared in this blog will help the developer community to better leverage NVIDIA GPU capabilities in their mission-critical LLM inference applications.</p>
+</section>
+</section>
+
+
+                </article>
+              
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+
+
+              
+                <dialog id="pst-secondary-sidebar-modal"></dialog>
+                <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+<div
+    id="pst-page-navigation-heading-2"
+    class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> On this page
+  </div>
+  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#table-of-contents">Table of Contents</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#motivation-for-large-scale-ep">Motivation for large-scale EP</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#observations-over-one-machine-translation-dataset">Observations over one machine translation dataset</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#observation-over-gsm8k-dataset">Observation over GSM8K dataset</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-level-design-introduction">High-level design introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ep-communication-kernels">EP communication kernels</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#motivation-of-ep-communication-kernels-for-gb200">Motivation of EP communication kernels for GB200</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ep-communication-kernels-implementation">EP communication kernels implementation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ep-load-balancer">EP Load Balancer</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#python-interface">Python Interface</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#c-extension">C++ extension</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#core-implementations-of-the-host-logic">Core implementations of the host logic</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#core-implementations-of-the-gpu-logic">Core implementations of the GPU logic</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#online-ep-load-balancer">Online EP Load Balancer</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#offline-ep-load-balancer">Offline EP Load Balancer</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#e2e-evaluation">E2E evaluation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-effect-of-ep-load-balancer">The effect of EP Load Balancer</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Offline EP Load Balancer</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Online EP Load Balancer</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#performance-study">Performance study</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reproducing-steps">Reproducing steps</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">The effect of EP Load Balancer</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#step-1-run-inference-and-collect-statistics">Step 1: Run inference and collect statistics</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#step-2-generate-the-eplb-configuration">Step 2: Generate the EPLB configuration</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#step-3-run-inference-with-the-eplb-configuration">Step 3: Run inference with the EPLB configuration</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#miscellaneous">Miscellaneous</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#expanded-thoughts">Expanded thoughts</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#acknowledgement">Acknowledgement</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+
+          </div>
+          <footer class="bd-footer-content">
+            
+          </footer>
+        
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script defer src="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
+<script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
+
+  <footer class="bd-footer">
+<div class="bd-footer__inner bd-page-width">
+  
+    <div class="footer-items__start">
+      
+        <div class="footer-item">
+<a class="footer-brand logo" href="https://www.nvidia.com">
+  <img src="../../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
+  <img src="../../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
+</a></div>
+      
+        <div class="footer-item">
+
+<div class="footer-links">
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
+  
+  
+  
+</div>
+</div>
+      
+        <div class="footer-item">
+
+
+
+
+  <p class="copyright">
+    
+      Copyright © 2025, NVidia.
+      <br/>
+    
+  </p>
+</div>
+      
+        <div class="footer-item">
+<div class="extra_footer">
+  
+  <p>Last updated on June 09, 2025.</p>
+  
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
+  
+</div></div>
+      
+    </div>
+  
+  
+  
+</div>
+
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/latest/commands/trtllm-build.html b/latest/commands/trtllm-build.html
index 7748f9223a..daf4a71064 100644
--- a/latest/commands/trtllm-build.html
+++ b/latest/commands/trtllm-build.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1056,9 +1056,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-serve.html b/latest/commands/trtllm-serve.html
index 681eef74b6..0d8aba6941 100644
--- a/latest/commands/trtllm-serve.html
+++ b/latest/commands/trtllm-serve.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -750,6 +750,12 @@ However, for the PyTorch backend, specified with the <code class="docutils liter
 <dd><p>Specific option for disaggregated mode.</p>
 </dd></dl>
 
+<dl class="std option">
+<dt class="sig sig-object std" id="cmdoption-trtllm-serve-disaggregated-m">
+<span id="cmdoption-trtllm-serve-disaggregated-metadata_server_config_file"></span><span class="sig-name descname"><span class="pre">-m</span></span><span class="sig-prename descclassname"></span><span class="sig-prename descclassname"><span class="pre">,</span> </span><span class="sig-name descname"><span class="pre">--metadata_server_config_file</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;metadata_server_config_file&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-disaggregated-m" title="Link to this definition">#</a></dt>
+<dd><p>Path to metadata server config file</p>
+</dd></dl>
+
 <dl class="std option">
 <dt class="sig sig-object std" id="cmdoption-trtllm-serve-disaggregated-t">
 <span id="cmdoption-trtllm-serve-disaggregated-server_start_timeout"></span><span class="sig-name descname"><span class="pre">-t</span></span><span class="sig-prename descclassname"></span><span class="sig-prename descclassname"><span class="pre">,</span> </span><span class="sig-name descname"><span class="pre">--server_start_timeout</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;server_start_timeout&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-disaggregated-t" title="Link to this definition">#</a></dt>
@@ -762,6 +768,17 @@ However, for the PyTorch backend, specified with the <code class="docutils liter
 <dd><p>Request timeout</p>
 </dd></dl>
 
+<dl class="std option">
+<dt class="sig sig-object std" id="cmdoption-trtllm-serve-disaggregated-l">
+<span id="cmdoption-trtllm-serve-disaggregated-log_level"></span><span class="sig-name descname"><span class="pre">-l</span></span><span class="sig-prename descclassname"></span><span class="sig-prename descclassname"><span class="pre">,</span> </span><span class="sig-name descname"><span class="pre">--log_level</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;log_level&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-disaggregated-l" title="Link to this definition">#</a></dt>
+<dd><p>The logging level.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Options<span class="colon">:</span></dt>
+<dd class="field-odd"><p>internal_error | error | warning | info | verbose | debug | trace</p>
+</dd>
+</dl>
+</dd></dl>
+
 </section>
 <section id="trtllm-serve-disaggregated-mpi-worker">
 <h4>disaggregated_mpi_worker<a class="headerlink" href="#trtllm-serve-disaggregated-mpi-worker" title="Link to this heading">#</a></h4>
@@ -925,6 +942,24 @@ However, for the PyTorch backend, specified with the <code class="docutils liter
 </dl>
 </dd></dl>
 
+<dl class="std option">
+<dt class="sig sig-object std" id="cmdoption-trtllm-serve-serve-metadata_server_config_file">
+<span class="sig-name descname"><span class="pre">--metadata_server_config_file</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;metadata_server_config_file&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-serve-metadata_server_config_file" title="Link to this definition">#</a></dt>
+<dd><p>Path to metadata server config file</p>
+</dd></dl>
+
+<dl class="std option">
+<dt class="sig sig-object std" id="cmdoption-trtllm-serve-serve-server_role">
+<span class="sig-name descname"><span class="pre">--server_role</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;server_role&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-serve-server_role" title="Link to this definition">#</a></dt>
+<dd><p>Server role. Specify this value only if running in disaggregated mode.</p>
+</dd></dl>
+
+<dl class="std option">
+<dt class="sig sig-object std" id="cmdoption-trtllm-serve-serve-0">
+<span class="sig-name descname"><span class="pre">--trust_remote_code</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;trust_remote_code&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-serve-0" title="Link to this definition">#</a></dt>
+<dd><p>Whether to trust remote code.</p>
+</dd></dl>
+
 <p class="rubric">Arguments</p>
 <dl class="std option">
 <dt class="sig sig-object std" id="cmdoption-trtllm-serve-serve-arg-MODEL">
@@ -1101,9 +1136,9 @@ However, for the PyTorch backend, specified with the <code class="docutils liter
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/dev-on-cloud/build-image-to-dockerhub.html b/latest/dev-on-cloud/build-image-to-dockerhub.html
index 1525aa566f..d3a2ced5c7 100644
--- a/latest/dev-on-cloud/build-image-to-dockerhub.html
+++ b/latest/dev-on-cloud/build-image-to-dockerhub.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -685,9 +685,9 @@ docker<span class="w"> </span>push<span class="w"> </span>&lt;your_dockerhub_use
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/dev-on-cloud/dev-on-runpod.html b/latest/dev-on-cloud/dev-on-runpod.html
index a35869e2b4..167e421146 100644
--- a/latest/dev-on-cloud/dev-on-runpod.html
+++ b/latest/dev-on-cloud/dev-on-runpod.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -685,9 +685,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_chat_client.html b/latest/examples/curl_chat_client.html
index e426b4d589..8f55cdc1df 100644
--- a/latest/examples/curl_chat_client.html
+++ b/latest/examples/curl_chat_client.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -656,9 +656,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_chat_client_for_multimodal.html b/latest/examples/curl_chat_client_for_multimodal.html
index 301ce4394e..0347b0e6dc 100644
--- a/latest/examples/curl_chat_client_for_multimodal.html
+++ b/latest/examples/curl_chat_client_for_multimodal.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -733,9 +733,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_completion_client.html b/latest/examples/curl_completion_client.html
index 3025854bc1..d49f53fe6c 100644
--- a/latest/examples/curl_completion_client.html
+++ b/latest/examples/curl_completion_client.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -655,9 +655,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/customization.html b/latest/examples/customization.html
index 669753a663..c0671d9e22 100644
--- a/latest/examples/customization.html
+++ b/latest/examples/customization.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -792,9 +792,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/deepseek_r1_reasoning_parser.html b/latest/examples/deepseek_r1_reasoning_parser.html
index e02ebada65..e9ad27cbe7 100644
--- a/latest/examples/deepseek_r1_reasoning_parser.html
+++ b/latest/examples/deepseek_r1_reasoning_parser.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -656,9 +656,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/genai_perf_client.html b/latest/examples/genai_perf_client.html
index fde5233ecc..55e7b96e3b 100644
--- a/latest/examples/genai_perf_client.html
+++ b/latest/examples/genai_perf_client.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -661,9 +661,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/genai_perf_client_for_multimodal.html b/latest/examples/genai_perf_client_for_multimodal.html
index d6aac1f037..a594c8811f 100644
--- a/latest/examples/genai_perf_client_for_multimodal.html
+++ b/latest/examples/genai_perf_client_for_multimodal.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -664,9 +664,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/index.html b/latest/examples/index.html
index f4b5146a1e..5a26d63b8b 100644
--- a/latest/examples/index.html
+++ b/latest/examples/index.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -697,9 +697,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_api_examples.html b/latest/examples/llm_api_examples.html
index 9e2224f70c..049d9b8cd1 100644
--- a/latest/examples/llm_api_examples.html
+++ b/latest/examples/llm_api_examples.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -661,9 +661,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_auto_parallel.html b/latest/examples/llm_auto_parallel.html
index d23b5bcbbd..4ff97516b3 100644
--- a/latest/examples/llm_auto_parallel.html
+++ b/latest/examples/llm_auto_parallel.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -679,9 +679,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_eagle2_decoding.html b/latest/examples/llm_eagle2_decoding.html
index d00948ce13..c133082504 100644
--- a/latest/examples/llm_eagle2_decoding.html
+++ b/latest/examples/llm_eagle2_decoding.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -700,9 +700,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_eagle_decoding.html b/latest/examples/llm_eagle_decoding.html
index 003714d8bf..9fa6a87701 100644
--- a/latest/examples/llm_eagle_decoding.html
+++ b/latest/examples/llm_eagle_decoding.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -705,9 +705,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_guided_decoding.html b/latest/examples/llm_guided_decoding.html
index 16eae7f081..6d337a7cd9 100644
--- a/latest/examples/llm_guided_decoding.html
+++ b/latest/examples/llm_guided_decoding.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -689,9 +689,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference.html b/latest/examples/llm_inference.html
index b4cd6c2808..2bf3e283f5 100644
--- a/latest/examples/llm_inference.html
+++ b/latest/examples/llm_inference.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -684,9 +684,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_async.html b/latest/examples/llm_inference_async.html
index e03351a886..ca0473a65e 100644
--- a/latest/examples/llm_inference_async.html
+++ b/latest/examples/llm_inference_async.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -687,9 +687,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_async_streaming.html b/latest/examples/llm_inference_async_streaming.html
index 26a6874d88..5893f6a8a2 100644
--- a/latest/examples/llm_inference_async_streaming.html
+++ b/latest/examples/llm_inference_async_streaming.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -707,9 +707,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_customize.html b/latest/examples/llm_inference_customize.html
index ceb4b1aa83..7933b669d1 100644
--- a/latest/examples/llm_inference_customize.html
+++ b/latest/examples/llm_inference_customize.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -700,9 +700,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_distributed.html b/latest/examples/llm_inference_distributed.html
index 2a9eaf5915..be90c3fc73 100644
--- a/latest/examples/llm_inference_distributed.html
+++ b/latest/examples/llm_inference_distributed.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -688,9 +688,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_kv_events.html b/latest/examples/llm_inference_kv_events.html
index d0de68e3b7..1cd99dd802 100644
--- a/latest/examples/llm_inference_kv_events.html
+++ b/latest/examples/llm_inference_kv_events.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -692,9 +692,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_logits_processor.html b/latest/examples/llm_logits_processor.html
index a7a657defa..3447ff65dc 100644
--- a/latest/examples/llm_logits_processor.html
+++ b/latest/examples/llm_logits_processor.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -762,9 +762,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_lookahead_decoding.html b/latest/examples/llm_lookahead_decoding.html
index 50c27b1e81..cc4b028d3f 100644
--- a/latest/examples/llm_lookahead_decoding.html
+++ b/latest/examples/llm_lookahead_decoding.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -682,9 +682,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_medusa_decoding.html b/latest/examples/llm_medusa_decoding.html
index 72f63e2800..c71c5ac1e3 100644
--- a/latest/examples/llm_medusa_decoding.html
+++ b/latest/examples/llm_medusa_decoding.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -738,9 +738,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_mgmn_llm_distributed.html b/latest/examples/llm_mgmn_llm_distributed.html
index ca1735624e..8ef5cbd3a5 100644
--- a/latest/examples/llm_mgmn_llm_distributed.html
+++ b/latest/examples/llm_mgmn_llm_distributed.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -697,9 +697,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_mgmn_trtllm_bench.html b/latest/examples/llm_mgmn_trtllm_bench.html
index c6f424b678..7c0df2c6b9 100644
--- a/latest/examples/llm_mgmn_trtllm_bench.html
+++ b/latest/examples/llm_mgmn_trtllm_bench.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -738,9 +738,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_mgmn_trtllm_serve.html b/latest/examples/llm_mgmn_trtllm_serve.html
index b5915e1fad..e77038d8cf 100644
--- a/latest/examples/llm_mgmn_trtllm_serve.html
+++ b/latest/examples/llm_mgmn_trtllm_serve.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -699,9 +699,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_multilora.html b/latest/examples/llm_multilora.html
index 0759100ce6..65fc8fb432 100644
--- a/latest/examples/llm_multilora.html
+++ b/latest/examples/llm_multilora.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -703,9 +703,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_quantization.html b/latest/examples/llm_quantization.html
index ead01073f4..93c5304fcd 100644
--- a/latest/examples/llm_quantization.html
+++ b/latest/examples/llm_quantization.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -725,9 +725,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_chat_client.html b/latest/examples/openai_chat_client.html
index 24cd0bedbb..690ea618bf 100644
--- a/latest/examples/openai_chat_client.html
+++ b/latest/examples/openai_chat_client.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -666,9 +666,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_chat_client_for_multimodal.html b/latest/examples/openai_chat_client_for_multimodal.html
index 4d8104a432..f11ba57bce 100644
--- a/latest/examples/openai_chat_client_for_multimodal.html
+++ b/latest/examples/openai_chat_client_for_multimodal.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -759,9 +759,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_completion_client.html b/latest/examples/openai_completion_client.html
index 8906617c30..37551d9095 100644
--- a/latest/examples/openai_completion_client.html
+++ b/latest/examples/openai_completion_client.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -660,9 +660,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/trtllm_serve_examples.html b/latest/examples/trtllm_serve_examples.html
index e25b6bc98f..c0d0ef6c4a 100644
--- a/latest/examples/trtllm_serve_examples.html
+++ b/latest/examples/trtllm_serve_examples.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -652,9 +652,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/genindex.html b/latest/genindex.html
index fb5b2ecb5d..59a3b16035 100644
--- a/latest/genindex.html
+++ b/latest/genindex.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -60,7 +60,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -576,6 +576,8 @@
     --log_level
 
       <ul>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">trtllm-serve-disaggregated command line option</a>
+</li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level">trtllm-serve-disaggregated_mpi_worker command line option</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-log_level">trtllm-serve-serve command line option</a>
@@ -602,8 +604,6 @@
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_num_tokens">trtllm-serve-serve command line option</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li>
     --max_seq_len
 
@@ -612,6 +612,17 @@
 </li>
       </ul></li>
       <li>
+    --metadata_server_config_file
+
+      <ul>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">trtllm-serve-disaggregated command line option</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-metadata_server_config_file">trtllm-serve-serve command line option</a>
+</li>
+      </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li>
     --num_postprocess_workers
 
       <ul>
@@ -644,6 +655,13 @@
 
       <ul>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">trtllm-serve-disaggregated command line option</a>
+</li>
+      </ul></li>
+      <li>
+    --server_role
+
+      <ul>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-server_role">trtllm-serve-serve command line option</a>
 </li>
       </ul></li>
       <li>
@@ -671,7 +689,7 @@
     --trust_remote_code
 
       <ul>
-        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-trust_remote_code">trtllm-serve-serve command line option</a>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-0">trtllm-serve-serve command line option</a>, <a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-trust_remote_code">[1]</a>
 </li>
       </ul></li>
       <li>
@@ -681,6 +699,20 @@
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">trtllm-serve-disaggregated command line option</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-c">trtllm-serve-disaggregated_mpi_worker command line option</a>
+</li>
+      </ul></li>
+      <li>
+    -l
+
+      <ul>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">trtllm-serve-disaggregated command line option</a>
+</li>
+      </ul></li>
+      <li>
+    -m
+
+      <ul>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">trtllm-serve-disaggregated command line option</a>
 </li>
       </ul></li>
       <li>
@@ -912,7 +944,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.cache_root">cache_root (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id7">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
+        <li><a href="llm-api/reference.html#id8">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig">CacheTransceiverConfig (class in tensorrt_llm.llmapi)</a>
@@ -1427,10 +1459,10 @@
 </li>
       <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin">fc_gate_plugin() (tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
 </li>
-      <li><a href="llm-api/reference.html#id12">field_name (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>, <a href="llm-api/reference.html#id15">[1]</a>, <a href="llm-api/reference.html#id18">[2]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.field_name">[3]</a>
+      <li><a href="llm-api/reference.html#id13">field_name (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>, <a href="llm-api/reference.html#id16">[1]</a>, <a href="llm-api/reference.html#id19">[2]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.field_name">[3]</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id21">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>, <a href="llm-api/reference.html#id24">[1]</a>, <a href="llm-api/reference.html#id27">[2]</a>, <a href="llm-api/reference.html#id30">[3]</a>, <a href="llm-api/reference.html#id33">[4]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.field_name">[5]</a>
+        <li><a href="llm-api/reference.html#id22">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>, <a href="llm-api/reference.html#id25">[1]</a>, <a href="llm-api/reference.html#id28">[2]</a>, <a href="llm-api/reference.html#id31">[3]</a>, <a href="llm-api/reference.html#id34">[4]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.field_name">[5]</a>
 </li>
       </ul></li>
       <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope">fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.AttentionParams method)</a>
@@ -2166,7 +2198,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.length">length (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id2">(tensorrt_llm.llmapi.CompletionOutput property)</a>
+        <li><a href="llm-api/reference.html#id3">(tensorrt_llm.llmapi.CompletionOutput property)</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.length_penalty">length_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>
@@ -2205,10 +2237,16 @@
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM">LLM (class in tensorrt_llm.llmapi)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir">llm_engine_dir (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.llm_id">llm_id (tensorrt_llm.llmapi.LLM attribute)</a>
+
+      <ul>
+        <li><a href="llm-api/reference.html#id0">(tensorrt_llm.llmapi.LLM property)</a>
+</li>
+      </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LlmArgs">LlmArgs (in module tensorrt_llm.llmapi)</a>
 </li>
       <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.load">load() (tensorrt_llm.models.PretrainedModel method)</a>
@@ -2250,7 +2288,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.logprobs_diff">logprobs_diff (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id3">(tensorrt_llm.llmapi.CompletionOutput property)</a>
+        <li><a href="llm-api/reference.html#id4">(tensorrt_llm.llmapi.CompletionOutput property)</a>
 </li>
       </ul></li>
       <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.long_rope">long_rope (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
@@ -2332,7 +2370,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb">max_cache_storage_gb (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id8">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
+        <li><a href="llm-api/reference.html#id9">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.max_cpu_loras">max_cpu_loras (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
@@ -2392,7 +2430,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.max_records">max_records (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id9">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
+        <li><a href="llm-api/reference.html#id10">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_seq_len">max_seq_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>
@@ -2593,10 +2631,10 @@
       </ul></li>
       <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.MropeParams">MropeParams (class in tensorrt_llm.layers.attention)</a>
 </li>
-      <li><a href="llm-api/reference.html#id10">msg (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>, <a href="llm-api/reference.html#id13">[1]</a>, <a href="llm-api/reference.html#id16">[2]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.msg">[3]</a>
+      <li><a href="llm-api/reference.html#id11">msg (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>, <a href="llm-api/reference.html#id14">[1]</a>, <a href="llm-api/reference.html#id17">[2]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.msg">[3]</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id19">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>, <a href="llm-api/reference.html#id22">[1]</a>, <a href="llm-api/reference.html#id25">[2]</a>, <a href="llm-api/reference.html#id28">[3]</a>, <a href="llm-api/reference.html#id31">[4]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.msg">[5]</a>
+        <li><a href="llm-api/reference.html#id20">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>, <a href="llm-api/reference.html#id23">[1]</a>, <a href="llm-api/reference.html#id26">[2]</a>, <a href="llm-api/reference.html#id29">[3]</a>, <a href="llm-api/reference.html#id32">[4]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.msg">[5]</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig">MTPDecodingConfig (class in tensorrt_llm.llmapi)</a>
@@ -2896,7 +2934,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.prompt">prompt (tensorrt_llm.llmapi.RequestOutput attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id6">(tensorrt_llm.llmapi.RequestOutput property)</a>
+        <li><a href="llm-api/reference.html#id7">(tensorrt_llm.llmapi.RequestOutput property)</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs">prompt_logprobs (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
@@ -6998,21 +7036,19 @@
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::GptDecoderBatched::finalize (C++ function)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forward (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forward (C++ function)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardAsync (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardAsync (C++ function)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv">tensorrt_llm::runtime::GptDecoderBatched::getBufferManager (C++ function)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv">tensorrt_llm::runtime::GptDecoderBatched::getDecoderState (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv">[1]</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv">tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv">tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder (C++ function)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr (C++ type)</a>
 </li>
@@ -7021,24 +7057,14 @@
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE">tensorrt_llm::runtime::GptDecoderBatched::mBufferManager (C++ member)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE">tensorrt_llm::runtime::GptDecoderBatched::mDecoder (C++ member)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE">tensorrt_llm::runtime::GptDecoderBatched::mDecoderState (C++ member)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE">tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream (C++ member)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE">tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream (C++ member)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::prepareForward (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE">tensorrt_llm::runtime::GptDecoderBatched::RequestVector (C++ type)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::setEagleInputs (C++ function)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::setExplicitDraftTokensInputs (C++ function)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::GptDecoderBatched::setup (C++ function)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE">tensorrt_llm::runtime::GptDecoderBatched::SharedConstPtr (C++ type)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::GptDecoderBatched::setup (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE">tensorrt_llm::runtime::GptDecoderBatched::TensorPtr (C++ type)</a>
 </li>
@@ -7166,9 +7192,9 @@
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::IGptDecoderBatched::finalize (C++ function)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forward (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forward (C++ function)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv">tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched (C++ function)</a>
 </li>
@@ -7176,7 +7202,7 @@
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE">tensorrt_llm::runtime::IGptDecoderBatched::RequestVector (C++ type)</a>
 </li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::IGptDecoderBatched::setup (C++ function)</a>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::IGptDecoderBatched::setup (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE">tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr (C++ type)</a>
 </li>
@@ -8635,7 +8661,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.text_diff">text_diff (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id4">(tensorrt_llm.llmapi.CompletionOutput property)</a>
+        <li><a href="llm-api/reference.html#id5">(tensorrt_llm.llmapi.CompletionOutput property)</a>
 </li>
       </ul></li>
       <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.TimestepEmbedding">TimestepEmbedding (class in tensorrt_llm.layers.embedding)</a>
@@ -8685,7 +8711,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.token_ids_diff">token_ids_diff (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id5">(tensorrt_llm.llmapi.CompletionOutput property)</a>
+        <li><a href="llm-api/reference.html#id6">(tensorrt_llm.llmapi.CompletionOutput property)</a>
 </li>
       </ul></li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs">token_range_retention_configs (tensorrt_llm.llmapi.KvCacheRetentionConfig property)</a>
@@ -8695,7 +8721,7 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.tokenizer">tokenizer (tensorrt_llm.llmapi.LLM attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id0">(tensorrt_llm.llmapi.LLM property)</a>
+        <li><a href="llm-api/reference.html#id1">(tensorrt_llm.llmapi.LLM property)</a>
 </li>
       </ul></li>
       <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token">tokenizer_image_token() (tensorrt_llm.runtime.MultimodalModelRunner static method)</a>
@@ -8773,12 +8799,20 @@
 
       <ul>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">--config_file</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">--log_level</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">--metadata_server_config_file</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">--request_timeout</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-t">--server_start_timeout</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">-c</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">-l</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">-m</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">-r</a>
 </li>
@@ -8823,6 +8857,8 @@
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_num_tokens">--max_num_tokens</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_seq_len">--max_seq_len</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-metadata_server_config_file">--metadata_server_config_file</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-num_postprocess_workers">--num_postprocess_workers</a>
 </li>
@@ -8831,12 +8867,14 @@
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-pp_size">--pp_size</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-reasoning_parser">--reasoning_parser</a>
+</li>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-server_role">--server_role</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-tokenizer">--tokenizer</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-tp_size">--tp_size</a>
 </li>
-        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-trust_remote_code">--trust_remote_code</a>
+        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-0">--trust_remote_code</a>, <a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-trust_remote_code">[1]</a>
 </li>
         <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-serve-arg-MODEL">MODEL</a>
 </li>
@@ -9046,15 +9084,15 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.workspace">workspace (tensorrt_llm.llmapi.LLM attribute)</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id1">(tensorrt_llm.llmapi.LLM property)</a>
+        <li><a href="llm-api/reference.html#id2">(tensorrt_llm.llmapi.LLM property)</a>
 </li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.workspace">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
 </li>
       </ul></li>
-      <li><a href="llm-api/reference.html#id11">wrapped_property (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>, <a href="llm-api/reference.html#id14">[1]</a>, <a href="llm-api/reference.html#id17">[2]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property">[3]</a>
+      <li><a href="llm-api/reference.html#id12">wrapped_property (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>, <a href="llm-api/reference.html#id15">[1]</a>, <a href="llm-api/reference.html#id18">[2]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property">[3]</a>
 
       <ul>
-        <li><a href="llm-api/reference.html#id20">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>, <a href="llm-api/reference.html#id23">[1]</a>, <a href="llm-api/reference.html#id26">[2]</a>, <a href="llm-api/reference.html#id29">[3]</a>, <a href="llm-api/reference.html#id32">[4]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property">[5]</a>
+        <li><a href="llm-api/reference.html#id21">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>, <a href="llm-api/reference.html#id24">[1]</a>, <a href="llm-api/reference.html#id27">[2]</a>, <a href="llm-api/reference.html#id30">[3]</a>, <a href="llm-api/reference.html#id33">[4]</a>, <a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property">[5]</a>
 </li>
       </ul></li>
   </ul></td>
@@ -9183,9 +9221,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/index.html b/latest/index.html
index a38bd94cb7..b58d718c7e 100644
--- a/latest/index.html
+++ b/latest/index.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -62,7 +62,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -903,9 +903,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/build-from-source-linux.html b/latest/installation/build-from-source-linux.html
index 77e5047f3b..7126958132 100644
--- a/latest/installation/build-from-source-linux.html
+++ b/latest/installation/build-from-source-linux.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -855,9 +855,9 @@ pip<span class="w"> </span>install<span class="w"> </span>./build/tensorrt_llm*.
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/grace-hopper.html b/latest/installation/grace-hopper.html
index a88006a446..99221c6e2c 100644
--- a/latest/installation/grace-hopper.html
+++ b/latest/installation/grace-hopper.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -679,9 +679,9 @@ sudo<span class="w"> </span>apt-get<span class="w"> </span>-y<span class="w"> </
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/linux.html b/latest/installation/linux.html
index a3beeadce7..955d624485 100644
--- a/latest/installation/linux.html
+++ b/latest/installation/linux.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -715,9 +715,9 @@ Please install CUDA toolkit when you see the following message when running Mode
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/key-features.html b/latest/key-features.html
index 73ad3a2bcf..5e0cc11f61 100644
--- a/latest/key-features.html
+++ b/latest/key-features.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -647,9 +647,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/llm-api/index.html b/latest/llm-api/index.html
index e89c88f65c..62efb4252a 100644
--- a/latest/llm-api/index.html
+++ b/latest/llm-api/index.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -762,9 +762,9 @@ Refer to the <a class="reference external" href="https://github.com/NVIDIA/Tenso
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/llm-api/reference.html b/latest/llm-api/reference.html
index 8ee33f8585..42d31f7db0 100644
--- a/latest/llm-api/reference.html
+++ b/latest/llm-api/reference.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -611,6 +611,17 @@
 </dl>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.llm_id">
+<span class="sig-name descname"><span class="pre">llm_id</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.llm_id" title="Link to this definition">#</a></dt>
+<dd><p>The unique ID of the LLM instance.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>str</p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.__init__">
 <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
@@ -832,6 +843,11 @@ after prompts have been submitted.</p>
 </dl>
 </dd></dl>
 
+<dl class="py property">
+<dt class="sig sig-object py" id="id0">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">llm_id</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#id0" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.save">
 <span class="sig-name descname"><span class="pre">save</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">engine_dir</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM.save"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.save" title="Link to this definition">#</a></dt>
@@ -849,13 +865,13 @@ after prompts have been submitted.</p>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id0">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">tokenizer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id0" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id1">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">tokenizer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id1" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id1">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">workspace</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#id1" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id2">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">workspace</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#id2" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -998,8 +1014,8 @@ after prompts have been submitted.</p>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id2">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">length</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#id2" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id3">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">length</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#id3" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -1008,8 +1024,8 @@ after prompts have been submitted.</p>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id3">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">logprobs_diff</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#id3" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id4">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">logprobs_diff</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#id4" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -1028,8 +1044,8 @@ after prompts have been submitted.</p>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id4">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">text_diff</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#id4" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id5">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">text_diff</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#id5" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -1038,8 +1054,8 @@ after prompts have been submitted.</p>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id5">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">token_ids_diff</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#id5" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id6">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">token_ids_diff</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#id6" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -1121,8 +1137,8 @@ after prompts have been submitted.</p>
 <dd></dd></dl>
 
 <dl class="py property">
-<dt class="sig sig-object py" id="id6">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">prompt</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id6" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id7">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">prompt</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id7" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -1983,7 +1999,7 @@ validated to form a valid model.</p>
 
 <dl class="py method pydantic_validator">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values">
-<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_positive_values</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">max_ngram_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_window_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_verification_set_size</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#LookaheadDecodingConfig.validate_positive_values"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values" title="Link to this definition">#</a></dt>
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_positive_values</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">max_verification_set_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_window_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_ngram_size</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#LookaheadDecodingConfig.validate_positive_values"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -2961,19 +2977,19 @@ changed, you should remove the caches manually.</p>
 <span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/build_cache.html#BuildCacheConfig.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildCacheConfig.__init__" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
-<dl class="py property">
-<dt class="sig sig-object py" id="id7">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">cache_root</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#id7" title="Link to this definition">#</a></dt>
-<dd></dd></dl>
-
 <dl class="py property">
 <dt class="sig sig-object py" id="id8">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_cache_storage_gb</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#id8" title="Link to this definition">#</a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">cache_root</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#id8" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
 <dt class="sig sig-object py" id="id9">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_records</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#id9" title="Link to this definition">#</a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_cache_storage_gb</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#id9" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="id10">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_records</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#id10" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -3625,20 +3641,20 @@ Whether to use a common pool for all requests, or the pool is private for each r
 <span class="sig-name descname"><span class="pre">max_cpu_loras</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.max_cpu_loras" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id10">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id10" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id11">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id11" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id11">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id11" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id12">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id12" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id12">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id12" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id13">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id13" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -3649,20 +3665,20 @@ Whether to use a common pool for all requests, or the pool is private for each r
 <span class="sig-name descname"><span class="pre">max_lora_rank</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.max_lora_rank" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id13">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id13" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id14">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id14" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id14">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id14" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id15">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id15" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id15">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id15" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id16">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id16" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -3673,20 +3689,20 @@ Whether to use a common pool for all requests, or the pool is private for each r
 <span class="sig-name descname"><span class="pre">max_loras</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.max_loras" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id16">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id16" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id17">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id17" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id17">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id17" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id18">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id18" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id18">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id18" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id19">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id19" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -3986,20 +4002,20 @@ This is useful if you want to do some validation that requires the entire model
 <span class="sig-name descname"><span class="pre">auto_parallel_world_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_world_size" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id19">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id19" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id20">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id20" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id20">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id20" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id21">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id21" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id21">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id21" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id22">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id22" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -4022,20 +4038,20 @@ This is useful if you want to do some validation that requires the entire model
 <span class="sig-name descname"><span class="pre">decoding_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">object</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TrtLlmArgs.decoding_config" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id22">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id22" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id23">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id23" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id23">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id23" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id24">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id24" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id24">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id24" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id25">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id25" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -4076,20 +4092,20 @@ This is useful if you want to do some validation that requires the entire model
 <span class="sig-name descname"><span class="pre">max_cpu_loras</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TrtLlmArgs.max_cpu_loras" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id25">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id25" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id26">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id26" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id26">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id26" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id27">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id27" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id27">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id27" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id28">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id28" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -4100,20 +4116,20 @@ This is useful if you want to do some validation that requires the entire model
 <span class="sig-name descname"><span class="pre">max_lora_rank</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TrtLlmArgs.max_lora_rank" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id28">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id28" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id29">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id29" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id29">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id29" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id30">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id30" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id30">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id30" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id31">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id31" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -4124,20 +4140,20 @@ This is useful if you want to do some validation that requires the entire model
 <span class="sig-name descname"><span class="pre">max_loras</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TrtLlmArgs.max_loras" title="Link to this definition">#</a></dt>
 <dd><p>Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.</p>
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id31">
-<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id31" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id32">
+<span class="sig-name descname"><span class="pre">msg</span></span><a class="headerlink" href="#id32" title="Link to this definition">#</a></dt>
 <dd><p>The deprecation message to be emitted.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id32">
-<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id32" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id33">
+<span class="sig-name descname"><span class="pre">wrapped_property</span></span><a class="headerlink" href="#id33" title="Link to this definition">#</a></dt>
 <dd><p>The property instance if the deprecated field is a computed field, or <cite>None</cite>.</p>
 </dd></dl>
 
 <dl class="py attribute">
-<dt class="sig sig-object py" id="id33">
-<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id33" title="Link to this definition">#</a></dt>
+<dt class="sig sig-object py" id="id34">
+<span class="sig-name descname"><span class="pre">field_name</span></span><a class="headerlink" href="#id34" title="Link to this definition">#</a></dt>
 <dd><p>The name of the field being deprecated.</p>
 </dd></dl>
 
@@ -4218,6 +4234,7 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM"><code class="docutils literal notranslate"><span class="pre">LLM</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.tokenizer"><code class="docutils literal notranslate"><span class="pre">tokenizer</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.workspace"><code class="docutils literal notranslate"><span class="pre">workspace</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.llm_id"><code class="docutils literal notranslate"><span class="pre">llm_id</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.generate"><code class="docutils literal notranslate"><span class="pre">generate()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.generate_async"><code class="docutils literal notranslate"><span class="pre">generate_async()</span></code></a></li>
@@ -4225,10 +4242,11 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.get_kv_cache_events_async"><code class="docutils literal notranslate"><span class="pre">get_kv_cache_events_async()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.get_stats"><code class="docutils literal notranslate"><span class="pre">get_stats()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.get_stats_async"><code class="docutils literal notranslate"><span class="pre">get_stats_async()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id0"><code class="docutils literal notranslate"><span class="pre">llm_id</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.save"><code class="docutils literal notranslate"><span class="pre">save()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.shutdown"><code class="docutils literal notranslate"><span class="pre">shutdown()</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id0"><code class="docutils literal notranslate"><span class="pre">tokenizer</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1"><code class="docutils literal notranslate"><span class="pre">workspace</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1"><code class="docutils literal notranslate"><span class="pre">tokenizer</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2"><code class="docutils literal notranslate"><span class="pre">workspace</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput"><code class="docutils literal notranslate"><span class="pre">CompletionOutput</span></code></a><ul class="nav section-nav flex-column">
@@ -4242,15 +4260,15 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.finish_reason"><code class="docutils literal notranslate"><span class="pre">finish_reason</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.generation_logits"><code class="docutils literal notranslate"><span class="pre">generation_logits</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.index"><code class="docutils literal notranslate"><span class="pre">index</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2"><code class="docutils literal notranslate"><span class="pre">length</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id3"><code class="docutils literal notranslate"><span class="pre">length</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.logprobs"><code class="docutils literal notranslate"><span class="pre">logprobs</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id3"><code class="docutils literal notranslate"><span class="pre">logprobs_diff</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id4"><code class="docutils literal notranslate"><span class="pre">logprobs_diff</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs"><code class="docutils literal notranslate"><span class="pre">prompt_logprobs</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.stop_reason"><code class="docutils literal notranslate"><span class="pre">stop_reason</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.text"><code class="docutils literal notranslate"><span class="pre">text</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id4"><code class="docutils literal notranslate"><span class="pre">text_diff</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id5"><code class="docutils literal notranslate"><span class="pre">text_diff</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CompletionOutput.token_ids"><code class="docutils literal notranslate"><span class="pre">token_ids</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id5"><code class="docutils literal notranslate"><span class="pre">token_ids_diff</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id6"><code class="docutils literal notranslate"><span class="pre">token_ids_diff</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput"><code class="docutils literal notranslate"><span class="pre">RequestOutput</span></code></a><ul class="nav section-nav flex-column">
@@ -4261,7 +4279,7 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput.context_logits"><code class="docutils literal notranslate"><span class="pre">context_logits</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput.finished"><code class="docutils literal notranslate"><span class="pre">finished</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id6"><code class="docutils literal notranslate"><span class="pre">prompt</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id7"><code class="docutils literal notranslate"><span class="pre">prompt</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams"><code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams</span></code></a><ul class="nav section-nav flex-column">
@@ -4526,9 +4544,9 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.max_records"><code class="docutils literal notranslate"><span class="pre">max_records</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb"><code class="docutils literal notranslate"><span class="pre">max_cache_storage_gb</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id7"><code class="docutils literal notranslate"><span class="pre">cache_root</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id8"><code class="docutils literal notranslate"><span class="pre">max_cache_storage_gb</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id9"><code class="docutils literal notranslate"><span class="pre">max_records</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id8"><code class="docutils literal notranslate"><span class="pre">cache_root</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id9"><code class="docutils literal notranslate"><span class="pre">max_cache_storage_gb</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id10"><code class="docutils literal notranslate"><span class="pre">max_records</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestError"><code class="docutils literal notranslate"><span class="pre">RequestError</span></code></a></li>
@@ -4609,21 +4627,21 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_dtype"><code class="docutils literal notranslate"><span class="pre">kv_cache_dtype</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.load_format"><code class="docutils literal notranslate"><span class="pre">load_format</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.max_cpu_loras"><code class="docutils literal notranslate"><span class="pre">max_cpu_loras</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id10"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id11"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id12"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id11"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id12"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id13"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.max_lora_rank"><code class="docutils literal notranslate"><span class="pre">max_lora_rank</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id13"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id14"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id15"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id14"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id15"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id16"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.max_loras"><code class="docutils literal notranslate"><span class="pre">max_loras</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id16"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id17"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id18"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id17"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id18"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id19"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.mixed_sampler"><code class="docutils literal notranslate"><span class="pre">mixed_sampler</span></code></a></li>
@@ -4653,17 +4671,17 @@ This is useful if you want to do some validation that requires the entire model
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_config"><code class="docutils literal notranslate"><span class="pre">auto_parallel_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_world_size"><code class="docutils literal notranslate"><span class="pre">auto_parallel_world_size</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id19"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id20"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id21"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id20"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id21"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id22"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.build_config"><code class="docutils literal notranslate"><span class="pre">build_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.calib_config"><code class="docutils literal notranslate"><span class="pre">calib_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.decoding_config"><code class="docutils literal notranslate"><span class="pre">decoding_config</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id22"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id23"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id24"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id23"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id24"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id25"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode"><code class="docutils literal notranslate"><span class="pre">embedding_parallel_mode</span></code></a></li>
@@ -4672,21 +4690,21 @@ This is useful if you want to do some validation that requires the entire model
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config"><code class="docutils literal notranslate"><span class="pre">extended_runtime_perf_knob_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.fast_build"><code class="docutils literal notranslate"><span class="pre">fast_build</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.max_cpu_loras"><code class="docutils literal notranslate"><span class="pre">max_cpu_loras</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id25"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id26"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id27"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id26"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id27"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id28"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.max_lora_rank"><code class="docutils literal notranslate"><span class="pre">max_lora_rank</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id28"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id29"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id30"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id29"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id30"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id31"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.max_loras"><code class="docutils literal notranslate"><span class="pre">max_loras</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id31"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id32"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id33"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id32"><code class="docutils literal notranslate"><span class="pre">msg</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id33"><code class="docutils literal notranslate"><span class="pre">wrapped_property</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id34"><code class="docutils literal notranslate"><span class="pre">field_name</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.model_config"><code class="docutils literal notranslate"><span class="pre">model_config</span></code></a></li>
@@ -4788,9 +4806,9 @@ This is useful if you want to do some validation that requires the entire model
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/objects.inv b/latest/objects.inv
index 8a655c18d0..71b4c7f586 100644
Binary files a/latest/objects.inv and b/latest/objects.inv differ
diff --git a/latest/overview.html b/latest/overview.html
index fbb0cb079b..bb30f566ca 100644
--- a/latest/overview.html
+++ b/latest/overview.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -705,9 +705,9 @@ Certain limitations might apply. Refer to the <a class="reference internal" href
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/perf-analysis.html b/latest/performance/perf-analysis.html
index 527a17526d..7b2764e988 100644
--- a/latest/performance/perf-analysis.html
+++ b/latest/performance/perf-analysis.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -764,9 +764,9 @@ python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w">
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/perf-benchmarking.html b/latest/performance/perf-benchmarking.html
index 1c7fccb60e..a8dbae3a70 100644
--- a/latest/performance/perf-benchmarking.html
+++ b/latest/performance/perf-benchmarking.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1543,9 +1543,9 @@ The choices are specified with a YAML file like the following example (<code cla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/perf-overview.html b/latest/performance/perf-overview.html
index 6b5d83fde2..ebb73ffb6f 100644
--- a/latest/performance/perf-overview.html
+++ b/latest/performance/perf-overview.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1467,9 +1467,9 @@ using the <code class="docutils literal notranslate"><span class="pre">--kv_cach
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/benchmarking-default-performance.html b/latest/performance/performance-tuning-guide/benchmarking-default-performance.html
index 59ab78800c..740b2495a0 100644
--- a/latest/performance/performance-tuning-guide/benchmarking-default-performance.html
+++ b/latest/performance/performance-tuning-guide/benchmarking-default-performance.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -915,9 +915,9 @@ P99:<span class="w"> </span><span class="m">1</span>.00
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/deciding-model-sharding-strategy.html b/latest/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
index f93cbdd593..c7a695547d 100644
--- a/latest/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
+++ b/latest/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -694,9 +694,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/fp8-quantization.html b/latest/performance/performance-tuning-guide/fp8-quantization.html
index 990e8fb4bb..682456cdae 100644
--- a/latest/performance/performance-tuning-guide/fp8-quantization.html
+++ b/latest/performance/performance-tuning-guide/fp8-quantization.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1026,9 +1026,9 @@ accuracy loss is unacceptable.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/index.html b/latest/performance/performance-tuning-guide/index.html
index 6da81fa5ef..28a6c45040 100644
--- a/latest/performance/performance-tuning-guide/index.html
+++ b/latest/performance/performance-tuning-guide/index.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -685,9 +685,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html b/latest/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
index 84b30626d9..0b4c3e69fa 100644
--- a/latest/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
+++ b/latest/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -876,9 +876,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/useful-build-time-flags.html b/latest/performance/performance-tuning-guide/useful-build-time-flags.html
index d4bd801cd6..3e3532e47a 100644
--- a/latest/performance/performance-tuning-guide/useful-build-time-flags.html
+++ b/latest/performance/performance-tuning-guide/useful-build-time-flags.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -939,9 +939,9 @@ This can be enabled via the LLM-API as such</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/performance/performance-tuning-guide/useful-runtime-flags.html b/latest/performance/performance-tuning-guide/useful-runtime-flags.html
index d4d2715d26..8021dbca85 100644
--- a/latest/performance/performance-tuning-guide/useful-runtime-flags.html
+++ b/latest/performance/performance-tuning-guide/useful-runtime-flags.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -862,9 +862,9 @@ via <code class="docutils literal notranslate"><span class="pre">KVCacheConfig</
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/py-modindex.html b/latest/py-modindex.html
index a871415b16..af0128b021 100644
--- a/latest/py-modindex.html
+++ b/latest/py-modindex.html
@@ -50,7 +50,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
 
@@ -681,9 +681,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/python-api/tensorrt_llm.functional.html b/latest/python-api/tensorrt_llm.functional.html
index 65f4ac9f8e..b7e7eede0a 100644
--- a/latest/python-api/tensorrt_llm.functional.html
+++ b/latest/python-api/tensorrt_llm.functional.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -7006,9 +7006,9 @@ function creates a constant tensor.</p></li>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/python-api/tensorrt_llm.layers.html b/latest/python-api/tensorrt_llm.layers.html
index d6fc118f27..75fc4aa5f0 100644
--- a/latest/python-api/tensorrt_llm.layers.html
+++ b/latest/python-api/tensorrt_llm.layers.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -2608,9 +2608,9 @@ the number of tokens used for each task, should be equal to prompt_embedding_tab
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/python-api/tensorrt_llm.models.html b/latest/python-api/tensorrt_llm.models.html
index 994310abbd..d0fc65f1b1 100644
--- a/latest/python-api/tensorrt_llm.models.html
+++ b/latest/python-api/tensorrt_llm.models.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -3580,9 +3580,9 @@ ranges of the dimensions of when using TRT dynamic shapes.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/python-api/tensorrt_llm.plugin.html b/latest/python-api/tensorrt_llm.plugin.html
index 3ed44e3115..19fb45bb79 100644
--- a/latest/python-api/tensorrt_llm.plugin.html
+++ b/latest/python-api/tensorrt_llm.plugin.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -698,9 +698,9 @@ migrated to the centralized building script <cite>tensorrt_llm/commands/build.py
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/python-api/tensorrt_llm.quantization.html b/latest/python-api/tensorrt_llm.quantization.html
index eb023c5228..68db71eeb6 100644
--- a/latest/python-api/tensorrt_llm.quantization.html
+++ b/latest/python-api/tensorrt_llm.quantization.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -736,9 +736,9 @@ the quantized model as TRT-LLM checkpoint</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/python-api/tensorrt_llm.runtime.html b/latest/python-api/tensorrt_llm.runtime.html
index c4f6e49965..786c3ceced 100644
--- a/latest/python-api/tensorrt_llm.runtime.html
+++ b/latest/python-api/tensorrt_llm.runtime.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -2158,7 +2158,7 @@ self.gather_generation_logits=True, respectively).</p>
 <span class="sig-name descname"><span class="pre">get_rope_index</span></span><span class="sig-paren">(</span>
 
 <dl>
-<dd><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">LongTensor</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">IntTensor</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">image_grid_thw</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">LongTensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">video_grid_thw</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">LongTensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
@@ -2195,7 +2195,7 @@ Here we calculate the text start position_ids as the max vision position_ids plu
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>input_ids</strong> (<cite>torch.LongTensor</cite> of shape <cite>(batch_size, sequence_length)</cite>) – Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+<li><p><strong>input_ids</strong> (<cite>torch.IntTensor</cite> of shape <cite>(batch_size, sequence_length)</cite>) – Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
 it.</p></li>
 <li><p><strong>image_grid_thw</strong> (<cite>torch.LongTensor</cite> of shape <cite>(num_images, 3)</cite>, <em>optional</em>) – The temporal, height and width of feature shape of each image in LLM.</p></li>
 <li><p><strong>video_grid_thw</strong> (<cite>torch.LongTensor</cite> of shape <cite>(num_videos, 3)</cite>, <em>optional</em>) – The temporal, height and width of feature shape of each video in LLM.</p></li>
@@ -2208,7 +2208,7 @@ it.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
-<dd class="field-even"><p>position_ids (<cite>torch.LongTensor</cite> of shape <cite>(3, batch_size, sequence_length)</cite>)
+<dd class="field-even"><p>position_ids (<cite>torch.IntTensor</cite> of shape <cite>(3, batch_size, sequence_length)</cite>)
 mrope_position_deltas (<cite>torch.Tensor</cite> of shape <cite>(batch_size)</cite>)</p>
 </dd>
 </dl>
@@ -3278,9 +3278,9 @@ For example, word_dict[2] = [” I am happy”, “ I am sad”].</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/quick-start-guide.html b/latest/quick-start-guide.html
index 597747797c..20d7f390fd 100644
--- a/latest/quick-start-guide.html
+++ b/latest/quick-start-guide.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -838,9 +838,9 @@ The model definition is a minimal example that shows some of the optimizations a
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/reference/memory.html b/latest/reference/memory.html
index 299aadd787..4f985c68d3 100644
--- a/latest/reference/memory.html
+++ b/latest/reference/memory.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -785,9 +785,9 @@ Here some explanations on how these values affect the memory:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/reference/precision.html b/latest/reference/precision.html
index 9952239e75..a2ff86f491 100644
--- a/latest/reference/precision.html
+++ b/latest/reference/precision.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -1281,9 +1281,9 @@ are:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/reference/support-matrix.html b/latest/reference/support-matrix.html
index 667a4a5efa..3f101221dd 100644
--- a/latest/reference/support-matrix.html
+++ b/latest/reference/support-matrix.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -941,9 +941,9 @@ In addition, older architectures can have limitations for newer software release
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/reference/troubleshooting.html b/latest/reference/troubleshooting.html
index 3137b6b16d..29d30c3590 100644
--- a/latest/reference/troubleshooting.html
+++ b/latest/reference/troubleshooting.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -971,9 +971,9 @@ dedicated MPI environment, not the one provided by your Slurm allocation.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/release-notes.html b/latest/release-notes.html
index 6dc54b2080..48de3b3ffc 100644
--- a/latest/release-notes.html
+++ b/latest/release-notes.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -2048,9 +2048,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/search.html b/latest/search.html
index 44d45c0d6e..945331593f 100644
--- a/latest/search.html
+++ b/latest/search.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -69,7 +69,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -626,9 +626,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/searchindex.js b/latest/searchindex.js
index 16ebaf26ff..28db37cf4c 100644
--- a/latest/searchindex.js
+++ b/latest/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"1. Download TensorRT-LLM": [[20, "download-tensorrt-llm"]], "1. Weights size": [[89, "weights-size"]], "2. Activation size": [[89, "activation-size"]], "2. Download the DeepSeek R1 models": [[20, "download-the-deepseek-r1-models"]], "3. Build and run TensorRT-LLM container": [[20, "build-and-run-tensorrt-llm-container"]], "3. I/O tensors": [[89, "i-o-tensors"]], "3.1 Runtime and decoder buffers except KV cache tensor": [[89, "runtime-and-decoder-buffers-except-kv-cache-tensor"]], "3.2 KV cache tensor": [[89, "kv-cache-tensor"]], "4. Compile and Install TensorRT-LLM": [[20, "compile-and-install-tensorrt-llm"]], "5. Optional: Tune GPU clocks": [[20, "optional-tune-gpu-clocks"]], "6. Dataset preparation": [[20, "dataset-preparation"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[7, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "ALiBi": [[5, "alibi"]], "API": [[3, "api"]], "API Changes": [[13, "api-changes"], [93, "api-changes"], [93, "id9"], [93, "id14"], [93, "id19"], [93, "id24"], [93, "id31"], [93, "id36"], [93, "id42"], [93, "id48"], [93, "id54"]], "API Introduction": [[69, null]], "API Reference": [[70, null]], "AWQ Quantization Scaling Factors": [[15, "awq-quantization-scaling-factors"]], "About": [[30, "about"]], "About Speculative Sampling": [[12, "about-speculative-sampling"]], "About TensorRT-LLM": [[71, "about-tensorrt-llm"]], "Accuracy": [[25, "accuracy"]], "Accuracy studies for Relaxed Acceptance": [[27, "accuracy-studies-for-relaxed-acceptance"]], "Achieving speedup with MTP speculative decoding": [[27, "achieving-speedup-with-mtp-speculative-decoding"]], "Acknowledgment": [[26, "acknowledgment"], [27, "acknowledgment"], [28, "acknowledgment"]], "Activation": [[83, "module-tensorrt_llm.layers.activation"]], "Adding a Model": [[14, null]], "Adding a New Model in PyTorch Backend": [[95, null]], "Advanced": [[64, null]], "Algorithm": [[11, "algorithm"]], "Announcements": [[93, "announcements"], [93, "id52"]], "Architecture": [[64, null]], "Architecture Ovewiew": [[96, null]], "Asyncio-Based Generation": [[36, "asyncio-based-generation"]], "Attention": [[83, "module-tensorrt_llm.layers.attention"], [97, null]], "Attention Backends": [[97, "attention-backends"]], "Attention Kernel": [[26, "attention-kernel"]], "Attention Weights": [[15, "attention-weights"]], "Attention for MTP": [[27, "attention-for-mtp"]], "Auto parallel arguments": [[29, "tensorrt_llm.commands.build-parse_arguments-auto-parallel-arguments"]], "Automatic Parallelism with LLM": [[42, null]], "Autoregressive MTP Layers": [[26, "autoregressive-mtp-layers"]], "B200 max-throughput": [[20, "b200-max-throughput"]], "B200 min-latency": [[20, "b200-min-latency"]], "Background": [[26, "background"], [27, "background"]], "Basic Implementation": [[27, "basic-implementation"]], "Beam-Search": [[5, "beam-search"]], "Before Benchmarking": [[73, "before-benchmarking"]], "Before You Begin: TensorRT-LLM LLM-API": [[75, "before-you-begin-tensorrt-llm-llm-api"]], "Benchmark": [[20, "benchmark"], [25, "benchmark"], [30, "benchmark"]], "Benchmarking Default Performance": [[75, null]], "Benchmarking a non-Medusa Low Latency Engine": [[73, "benchmarking-a-non-medusa-low-latency-engine"]], "Benchmarking with trtllm-bench": [[75, "benchmarking-with-trtllm-bench"]], "Benchmarks": [[2, "benchmarks"]], "Best practices to choose the right quantization methods": [[25, "best-practices-to-choose-the-right-quantization-methods"]], "Block": [[8, "block"]], "Boost settings": [[73, "boost-settings"]], "Build APIs": [[19, "build-apis"]], "Build Checkpoint into TensorRT Engine": [[15, "build-checkpoint-into-tensorrt-engine"]], "Build Configuration": [[36, "build-configuration"]], "Build TensorRT-LLM": [[65, "build-tensorrt-llm"]], "Build the TensorRT-LLM Docker Image": [[31, null]], "Build the TensorRT-LLM Docker Image and Upload to DockerHub": [[31, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"], [32, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"]], "Building a Benchmark Engine": [[73, "building-a-benchmark-engine"]], "Building a Medusa Low-Latency Engine": [[73, "building-a-medusa-low-latency-engine"]], "Building a TensorRT-LLM Docker Image": [[65, "building-a-tensorrt-llm-docker-image"]], "Building and Saving Engines via CLI": [[75, "building-and-saving-engines-via-cli"]], "Building and Saving the Engine": [[75, "building-and-saving-the-engine"]], "Building from Source Code on Linux": [[65, null]], "Building the Python Bindings for the C++ Runtime": [[65, "building-the-python-bindings-for-the-c-runtime"]], "C++ Executor API Example": [[3, "c-executor-api-example"]], "C++ GPT Runtime": [[6, null]], "C++ runtime": [[89, "c-runtime"], [89, "id1"]], "CLI Tools": [[19, "cli-tools"]], "CUDA Graph & Programmatic Dependent Launch": [[26, "cuda-graph-programmatic-dependent-launch"]], "CUTLASS Backend (default backend)": [[26, "cutlass-backend-default-backend"]], "Capacity Scheduler Policy": [[81, "capacity-scheduler-policy"]], "Cast": [[83, "module-tensorrt_llm.layers.cast"]], "Chat API": [[30, "chat-api"]], "Chunked Context": [[5, "chunked-context"]], "Classical Workflow": [[7, "classical-workflow"]], "Closing": [[21, "closing"], [24, "closing"]], "Collect PyTorch profiler results": [[72, "collect-pytorch-profiler-results"]], "Command Overview": [[74, "command-overview"]], "Common LLM Support": [[71, "common-llm-support"]], "Communication Kernel": [[26, "communication-kernel"]], "Compilation": [[16, "compilation"]], "Compile the Model into a TensorRT Engine": [[88, "compile-the-model-into-a-tensorrt-engine"]], "Completions API": [[30, "completions-api"], [30, "id1"]], "Conclusion": [[77, "conclusion"], [79, "conclusion"], [80, "conclusion"]], "Config": [[15, "config"]], "Configure SSH Key": [[32, "configure-ssh-key"]], "Configure The Executor": [[3, "configure-the-executor"]], "Connect to the Pod": [[32, "connect-to-the-pod"]], "Context Chunking Policy": [[81, "context-chunking-policy"]], "Context Phase": [[5, "context-phase"]], "Context and Generation Phases": [[5, "context-and-generation-phases"]], "Contiguous KV Cache": [[5, "contiguous-kv-cache"]], "Control generated text using logits processor": [[52, null]], "Controlling output with Logits Post-Processor": [[3, "controlling-output-with-logits-post-processor"]], "Conv": [[83, "module-tensorrt_llm.layers.conv"]], "Conversion APIs": [[19, "conversion-apis"]], "Coordinating with NVIDIA Nsight Systems Launch": [[72, "coordinating-with-nvidia-nsight-systems-launch"]], "Coordinating with PyTorch profiler (PyTorch workflow only)": [[72, "coordinating-with-pytorch-profiler-pytorch-workflow-only"]], "Core Models": [[95, "core-models"]], "Create a Pod Template": [[32, "create-a-pod-template"]], "Create a Runpod account": [[32, "create-a-runpod-account"]], "Create the Container": [[65, "create-the-container"]], "Cross Attention": [[5, "cross-attention"]], "Curl Chat Client": [[33, null]], "Curl Chat Client For Multimodal": [[34, null]], "Curl Completion Client": [[35, null]], "Customize KV Cache Manager": [[98, "customize-kv-cache-manager"]], "Customize Your Own Scheduler": [[99, "customize-your-own-scheduler"]], "Data Parallel for Attention module (ADP)": [[28, "data-parallel-for-attention-module-adp"]], "Debug Execution Errors": [[92, "debug-execution-errors"]], "Debug on E2E Models": [[92, "debug-on-e2e-models"]], "Debug on Unit Tests": [[92, "debug-on-unit-tests"]], "Debugging FAQs": [[2, "debugging-faqs"]], "Deciding Model Sharding Strategy": [[76, null]], "Decoder": [[96, "decoder"]], "DeepSeek R1 MTP Implementation and Optimization": [[27, null]], "Deepseek R1 Reasoning Parser": [[37, null]], "Default Build Behavior": [[73, "default-build-behavior"]], "Dense GEMM optimization": [[26, "dense-gemm-optimization"]], "Deploy with Triton Inference Server": [[88, "deploy-with-triton-inference-server"]], "Deploy with trtllm-serve": [[88, "deploy-with-trtllm-serve"]], "Develop TensorRT-LLM on Runpod": [[32, null]], "Developer Guide": [[94, "developer-guide"]], "Disable Tokenizer": [[36, "disable-tokenizer"]], "Disaggregated-Service (experimental)": [[2, null]], "Distributed LLM Generation": [[50, null]], "DoRA": [[10, "dora"]], "Documentation": [[93, "documentation"], [93, "id28"]], "Draft-Target-Model": [[12, "draft-target-model"]], "EAGLE": [[12, "eagle"]], "Eagle3 support": [[27, "eagle3-support"]], "Embedding": [[83, "module-tensorrt_llm.layers.embedding"]], "Enable GIL information in NVTX markers": [[72, "enable-gil-information-in-nvtx-markers"]], "Enable garbage collection (GC) NVTX markers": [[72, "enable-garbage-collection-gc-nvtx-markers"]], "Enable kv cache reuse for p-tuning": [[9, "enable-kv-cache-reuse-for-p-tuning"]], "Enable more NVTX markers for debugging": [[72, "enable-more-nvtx-markers-for-debugging"]], "Enable ssh access to the container": [[31, "enable-ssh-access-to-the-container"]], "Enabling GEMM + SwiGLU Fusion": [[77, "enabling-gemm-swiglu-fusion"]], "Enabling GEMM Plugin": [[80, "enabling-gemm-plugin"]], "Enabling Low Latency GEMM plugin": [[77, "enabling-low-latency-gemm-plugin"]], "Enabling Paged Context Attention": [[80, "enabling-paged-context-attention"]], "Enabling Quantization": [[77, "enabling-quantization"]], "Enabling Quantized KV Cache": [[77, "enabling-quantized-kv-cache"]], "Enabling Reduce Norm Fusion Plugin": [[80, "enabling-reduce-norm-fusion-plugin"]], "Enabling Reduce Norm Fusion with User Buffers": [[77, "enabling-reduce-norm-fusion-with-user-buffers"]], "Enabling building with multiple profiles": [[80, "enabling-building-with-multiple-profiles"]], "Environment Variables": [[2, "environment-variables"], [11, "environment-variables"]], "Evaluation": [[27, "evaluation"]], "Events in KVCacheEventManager": [[8, "events-in-kvcacheeventmanager"]], "Everything in One Diagram": [[26, "everything-in-one-diagram"]], "Example": [[2, "example"], [15, "example"]], "Example LoRA tensors": [[10, "example-lora-tensors"]], "Example of Build Subcommand Output:": [[73, "example-of-build-subcommand-output"]], "Examples": [[16, "examples"], [17, "examples"], [72, "examples"]], "Executor": [[0, null]], "Executor API": [[3, null]], "Expected Result Format": [[20, "expected-result-format"], [20, "id1"], [20, "id2"]], "Expected Results": [[20, "expected-results"]], "Expert Parallelism in TensorRT-LLM": [[4, null]], "Expert parallel for MoE (EP)": [[28, "expert-parallel-for-moe-ep"]], "Exploring more ISL/OSL combinations": [[20, "exploring-more-isl-osl-combinations"]], "FAQ": [[89, "faq"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[7, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "FP32, FP16 and BF16": [[90, "fp32-fp16-and-bf16"]], "FP4 Models:": [[74, "fp4-models"]], "FP8 (Hopper)": [[90, "fp8-hopper"]], "FP8 Context FMHA": [[5, "fp8-context-fmha"]], "FP8 Models:": [[74, "fp8-models"]], "FP8 Quantization": [[77, null]], "FP8 Quantization Scaling Factors": [[15, "fp8-quantization-scaling-factors"]], "FP8 Support": [[71, "fp8-support"]], "FP8 \u201cBaseline\u201d Performance": [[77, "fp8-baseline-performance"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[21, null]], "Falcon-180B on a single H200 with INT4 AWQ": [[21, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Feature Descriptions": [[72, "feature-descriptions"]], "Fix known issues": [[27, "fix-known-issues"]], "Fixed Issues": [[93, "fixed-issues"], [93, "id11"], [93, "id15"], [93, "id21"], [93, "id26"], [93, "id33"], [93, "id38"], [93, "id44"], [93, "id50"], [93, "id56"], [93, "id61"]], "Fully customized": [[17, "fully-customized"]], "Functionals": [[82, null]], "Fuse_A_GEMM": [[26, "fuse-a-gemm"]], "Future Works": [[26, "future-works"], [27, "future-works"], [28, "future-works"]], "Future-Style Generation": [[36, "future-style-generation"]], "GEMM + SwiGLU Fusion in Gated-MLP": [[77, "gemm-swiglu-fusion-in-gated-mlp"]], "GEMM Plugin": [[80, "gemm-plugin"]], "GPTQ and AWQ (W4A16)": [[90, "gptq-and-awq-w4a16"]], "GPU Clock Management": [[73, "gpu-clock-management"]], "Genai Perf Client": [[38, null]], "Genai Perf Client For Multimodal": [[39, null]], "General FAQs": [[2, "general-faqs"]], "Generate Text Asynchronously": [[47, null]], "Generate Text Using Eagle Decoding": [[44, null]], "Generate Text Using Eagle2 Decoding": [[43, null]], "Generate Text Using Lookahead Decoding": [[53, null]], "Generate Text Using Medusa Decoding": [[54, null]], "Generate Text in Streaming": [[48, null]], "Generate text": [[46, null]], "Generate text with customization": [[49, null]], "Generate text with guided decoding": [[45, null]], "Generate text with multiple LoRA adapters": [[58, null]], "Generation": [[36, "generation"]], "Generation Phase": [[5, "generation-phase"]], "Generation with Quantization": [[59, null]], "Get KV Cache Events": [[51, null]], "Getting Started": [[64, null]], "Graph Rewriting APIs": [[7, "graph-rewriting-apis"]], "Graph Rewriting Module": [[7, null]], "Grouped GEMM": [[26, "grouped-gemm"]], "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token": [[22, null]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM": [[23, null]], "H200 max-throughput": [[20, "h200-max-throughput"]], "H200 min-latency": [[20, "h200-min-latency"]], "H200 vs H100": [[23, "h200-vs-h100"]], "Hardware": [[91, "hardware"]], "Hierarchy: Pool, Block, and Page": [[8, "hierarchy-pool-block-and-page"]], "How the Benchmarker Works": [[73, "how-the-benchmarker-works"]], "How to Enable": [[4, "how-to-enable"]], "How to Think about Model Sharding: Communication is Key": [[76, "how-to-think-about-model-sharding-communication-is-key"]], "How to change Max Batch Size": [[79, "how-to-change-max-batch-size"]], "How to change Max Num Tokens": [[79, "how-to-change-max-num-tokens"]], "How to enable kv cache reuse": [[9, "how-to-enable-kv-cache-reuse"]], "How to get best performance on DeepSeek-R1 in TensorRT-LLM": [[20, null]], "How to reproduce": [[26, "how-to-reproduce"], [28, "how-to-reproduce"]], "How to run DeepSeek models with MTP": [[27, "how-to-run-deepseek-models-with-mtp"]], "How to run the DeepSeek-R1 model with Relaxed Acceptance": [[27, "how-to-run-the-deepseek-r1-model-with-relaxed-acceptance"]], "How to set Tensor Parallelism and Pipeline Parallelism": [[76, "how-to-set-tensor-parallelism-and-pipeline-parallelism"]], "Hugging Face Hub": [[69, "hugging-face-hub"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[90, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "INT8 SmoothQuant (W8A8)": [[90, "int8-smoothquant-w8a8"]], "INT8/FP8 KV Caches": [[5, "int8-fp8-kv-caches"]], "Implement AttentionBackend": [[97, "implement-attentionbackend"]], "Implement AttentionMetadata": [[97, "implement-attentionmetadata"]], "Implement a New Attention Backend": [[97, "implement-a-new-attention-backend"]], "Implementation Configuration": [[26, "implementation-configuration"]], "Important Note": [[5, "important-note"]], "In-Flight Batching and Paged Attention": [[71, "in-flight-batching-and-paged-attention"]], "In-flight Batching": [[5, "in-flight-batching"]], "In-flight Batching with the Triton Inference Server": [[3, "in-flight-batching-with-the-triton-inference-server"]], "Indices and tables": [[64, "indices-and-tables"]], "Inference Endpoints": [[30, "inference-endpoints"]], "Infrastructure Changes": [[93, "infrastructure-changes"], [93, "id4"], [93, "id7"], [93, "id12"], [93, "id16"], [93, "id22"], [93, "id27"], [93, "id34"], [93, "id39"], [93, "id45"]], "Infrastructure changes": [[93, "id51"]], "Input QKV tensor": [[5, "input-qkv-tensor"]], "Installation": [[64, null]], "Installation Errors": [[92, "installation-errors"]], "Installing on Grace Hopper": [[66, null]], "Installing on Linux": [[67, null]], "Interfaces": [[98, "interfaces"]], "Internal Components": [[6, "internal-components"]], "Introduction": [[28, "introduction"], [95, "introduction"]], "KV Cache": [[5, "kv-cache"]], "KV Cache Management: Pools, Blocks, and Events": [[8, null]], "KV Cache Manager": [[98, null]], "KV Cache Manager Introduction": [[98, "kv-cache-manager-introduction"]], "KV Cache Pool Management": [[8, "kv-cache-pool-management"]], "KV Cache Quantization Scaling Factors": [[15, "kv-cache-quantization-scaling-factors"]], "KV cache reuse": [[9, null]], "KVCacheManager": [[96, "kvcachemanager"]], "Kernel Level optimizations": [[26, "kernel-level-optimizations"]], "Kernel fusion": [[26, "kernel-fusion"]], "Key Components": [[94, "key-components"]], "Key Features": [[68, null]], "Key Features and Enhancements": [[93, "key-features-and-enhancements"], [93, "id2"], [93, "id3"], [93, "id5"], [93, "id8"], [93, "id13"], [93, "id18"], [93, "id23"], [93, "id30"], [93, "id35"], [93, "id41"], [93, "id47"], [93, "id53"], [93, "id57"], [93, "id59"]], "Key Optimizations": [[26, "key-optimizations"]], "Known Issues": [[89, "known-issues"], [93, "known-issues"], [93, "id6"], [93, "id10"], [93, "id17"], [93, "id29"], [93, "id40"], [93, "id46"], [93, "id62"], [94, "known-issues"]], "Known Limitations": [[65, "known-limitations"]], "LLM API": [[88, "llm-api"]], "LLM API Examples": [[40, null]], "LLM Common Customizations": [[36, null]], "LLM Examples": [[41, null]], "LLM Examples Introduction": [[40, null]], "LLM Models": [[91, "llm-models"]], "Latest GPU Support": [[71, "latest-gpu-support"]], "Latest HBM Memory": [[23, "latest-hbm-memory"]], "LayerNorm Weights": [[15, "layernorm-weights"]], "Layers": [[83, null]], "Limitations": [[12, "limitations"], [93, "limitations"]], "Limitations and Caveats": [[73, "limitations-and-caveats"]], "Linear": [[83, "module-tensorrt_llm.layers.linear"]], "Linking with the TensorRT-LLM C++ Runtime": [[65, "linking-with-the-tensorrt-llm-c-runtime"]], "Llama 3.1 405B": [[16, "llama-3-1-405b"]], "Llama 3.1 405B FP4": [[74, "llama-3-1-405b-fp4"]], "Llama 3.1 405B FP8": [[74, "llama-3-1-405b-fp8"]], "Llama 3.1 70B": [[16, "llama-3-1-70b"]], "Llama 3.1 70B FP8": [[74, "llama-3-1-70b-fp8"]], "Llama 3.1 8B FP8": [[74, "llama-3-1-8b-fp8"]], "Llama 3.3 70B FP4": [[74, "llama-3-3-70b-fp4"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[24, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Llama-70B on H200 up to 6.7x A100": [[21, "llama-70b-on-h200-up-to-6-7x-a100"]], "Llm Mgmn Llm Distributed": [[55, null]], "Llm Mgmn Trtllm Bench": [[56, null]], "Llm Mgmn Trtllm Serve": [[57, null]], "LoRA Module id mapping": [[10, "lora-module-id-mapping"]], "LoRA arguments": [[29, "tensorrt_llm.commands.build-parse_arguments-lora-arguments"]], "LoRA tensor format details": [[10, "lora-tensor-format-details"]], "LoRA with tensor parallel": [[10, "lora-with-tensor-parallel"]], "Loading function": [[17, "loading-function"]], "Local Hugging Face Models": [[69, "local-hugging-face-models"]], "Local TensorRT-LLM Engine": [[69, "local-tensorrt-llm-engine"]], "Logits arguments": [[29, "tensorrt_llm.commands.build-parse_arguments-logits-arguments"]], "Lookahead Decoding": [[12, "lookahead-decoding"]], "LoraCache configuration": [[10, "loracache-configuration"]], "Low Latency Benchmark": [[73, "low-latency-benchmark"]], "Low Latency GEMM Plugin": [[77, "low-latency-gemm-plugin"]], "Low Latency TensorRT-LLM Engine for Llama-3 70B": [[73, "low-latency-tensorrt-llm-engine-for-llama-3-70b"]], "Low-Precision-AllReduce": [[11, null]], "MLA Layers Optimizations": [[28, "mla-layers-optimizations"]], "MLP": [[83, "module-tensorrt_llm.layers.mlp"]], "MLP Weights": [[15, "mlp-weights"]], "MLPerf on H100 with FP8": [[22, "mlperf-on-h100-with-fp8"]], "MTP": [[26, "mtp"]], "MTP Eagle": [[27, "mtp-eagle"]], "MTP Modules": [[27, "mtp-modules"]], "MTP Vanilla": [[27, "mtp-vanilla"]], "MTP for inference": [[27, "mtp-for-inference"]], "MTP implementation in TensorRT-LLM": [[27, "mtp-implementation-in-tensorrt-llm"]], "MTP optimization - Relaxed Acceptance": [[27, "mtp-optimization-relaxed-acceptance"]], "Make Evaluation": [[15, "make-evaluation"]], "Mark Tensors As Output": [[3, "mark-tensors-as-output"]], "Max Throughput Benchmark": [[73, "max-throughput-benchmark"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[81, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Maximum Attention Window Size": [[81, "maximum-attention-window-size"]], "Medusa": [[12, "medusa"]], "Medusa Tree": [[12, "medusa-tree"]], "Memory Usage of TensorRT-LLM": [[89, null]], "Memory pool": [[89, "memory-pool"]], "Metrics Endpoint": [[30, "metrics-endpoint"]], "Mixed ETP": [[26, "mixed-etp"]], "Mixture of Experts (MoE)": [[4, "mixture-of-experts-moe"]], "MoE Layers Optimizations": [[28, "moe-layers-optimizations"]], "Model Architecture": [[26, "model-architecture"]], "Model Configuration": [[6, "model-configuration"], [95, "model-configuration"]], "Model Definition": [[16, null], [95, "model-definition"]], "Model Definition API": [[88, "model-definition-api"]], "Model Engine": [[16, "model-engine"], [96, "model-engine"]], "Model Preparation": [[69, "model-preparation"]], "Model Registration": [[95, "model-registration"]], "Model Updates": [[93, "model-updates"], [93, "id20"], [93, "id25"], [93, "id32"], [93, "id37"], [93, "id43"], [93, "id49"], [93, "id55"], [93, "id58"], [93, "id60"]], "Model Weights": [[18, "model-weights"]], "Models": [[84, null]], "Models (PyTorch Backend)": [[91, "models-pytorch-backend"]], "Models (TensorRT Backend)": [[91, "models-tensorrt-backend"]], "Models with customized key names": [[17, "models-with-customized-key-names"]], "Models with customized weight layout": [[17, "models-with-customized-weight-layout"]], "Multi-GPU Multi-Node Inference": [[71, "multi-gpu-multi-node-inference"]], "Multi-GPU and Multi-Node Support": [[16, "multi-gpu-and-multi-node-support"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[5, null]], "Multi-Modal Models 3": [[91, "multi-modal-models"]], "Multi-node Serving with Slurm": [[30, "multi-node-serving-with-slurm"]], "Multi-streams": [[26, "multi-streams"]], "Multimodal Serving": [[30, "multimodal-serving"]], "Multiple Profiles": [[80, "multiple-profiles"]], "NVFP4 (Blackwell)": [[90, "nvfp4-blackwell"]], "Named Arguments": [[29, "tensorrt_llm.commands.build-parse_arguments-named-arguments"]], "Native Windows Support": [[71, "native-windows-support"]], "Natively supported models": [[17, "natively-supported-models"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[24, null]], "Next Steps": [[88, "next-steps"]], "Normalization": [[83, "module-tensorrt_llm.layers.normalization"]], "Note on context outputs": [[3, "note-on-context-outputs"]], "Numerical Precision": [[90, null]], "Obtaining Arbitrary Output Tensors": [[3, "obtaining-arbitrary-output-tensors"]], "Offloading to host memory": [[9, "offloading-to-host-memory"]], "Online Serving Examples": [[63, null]], "Only collect specific iterations": [[72, "only-collect-specific-iterations"]], "OpenAI Chat Client": [[60, null], [61, null]], "OpenAI Completion Client": [[62, null]], "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers": [[28, null]], "Option 1: Build TensorRT-LLM in One Step": [[65, "option-1-build-tensorrt-llm-in-one-step"]], "Option 1: Full Build with C++ Compilation": [[65, "option-1-full-build-with-c-compilation"]], "Option 2: Build TensorRT-LLM Step-by-Step": [[65, "option-2-build-tensorrt-llm-step-by-step"]], "Option 2: Python-Only Build without C++ Compilation": [[65, "option-2-python-only-build-without-c-compilation"]], "Other Build Modes": [[73, "other-build-modes"]], "Out of memory issues": [[20, "out-of-memory-issues"]], "Out-of-Tree Models": [[95, "out-of-tree-models"]], "Overview": [[6, "overview"], [15, "overview"], [17, "overview"], [19, "overview"], [71, null], [74, null]], "Padded and Packed Tensors": [[5, "padded-and-packed-tensors"]], "Page": [[8, "page"]], "Paged Context Attention": [[80, "paged-context-attention"]], "Paged KV Cache": [[5, "paged-kv-cache"]], "Parallel strategy": [[28, "parallel-strategy"]], "Parallelism Mapping Support": [[73, "parallelism-mapping-support"]], "Parallelism Strategy": [[26, "parallelism-strategy"]], "Pattern and Pattern Manager": [[7, "pattern-and-pattern-manager"]], "Pattern-Matching and Fusion": [[16, "pattern-matching-and-fusion"]], "Performance": [[25, "performance"], [64, null], [80, "performance"]], "Performance Analysis": [[72, null]], "Performance Improvements": [[12, "performance-improvements"]], "Performance Tuning Guide": [[78, null]], "Performance and Accuracy Considerations": [[11, "performance-and-accuracy-considerations"]], "Performance expectations": [[9, "performance-expectations"]], "Performance with GEMM + SwiGLU Fusion": [[77, "performance-with-gemm-swiglu-fusion"]], "Performance with GEMM Plugin": [[80, "performance-with-gemm-plugin"]], "Performance with Low Latency GEMM plugin": [[77, "performance-with-low-latency-gemm-plugin"]], "Performance with Quantized KV Cache": [[77, "performance-with-quantized-kv-cache"]], "Performance with Reduce Norm Fusion": [[80, "performance-with-reduce-norm-fusion"]], "Performance with Reduce Norm Fusion + User Buffers:": [[77, "performance-with-reduce-norm-fusion-user-buffers"]], "Performance with multiple profiles": [[80, "performance-with-multiple-profiles"]], "Persistence mode": [[73, "persistence-mode"]], "Pipeline Parallel Reduce Scatter Optimization": [[80, "pipeline-parallel-reduce-scatter-optimization"]], "Plugin": [[85, null]], "Plugin config arguments": [[29, "tensorrt_llm.commands.build-parse_arguments-plugin-config-arguments"]], "Plugins": [[16, "plugins"]], "Pool": [[8, "pool"]], "Pooling": [[83, "module-tensorrt_llm.layers.pooling"]], "Postprocessing functions": [[17, "postprocessing-functions"]], "Precision Strategy": [[26, "precision-strategy"]], "Precision strategy": [[28, "precision-strategy"]], "Prepare": [[32, "prepare"]], "Prepare Dataset": [[75, "prepare-dataset"]], "Prepare the TensorRT-LLM Checkpoint": [[15, "prepare-the-tensorrt-llm-checkpoint"]], "Preparing a Dataset": [[73, "preparing-a-dataset"], [74, "preparing-a-dataset"]], "Prerequisite Knowledge": [[78, "prerequisite-knowledge"]], "Prerequisites": [[65, "prerequisites"], [88, "prerequisites"], [95, "prerequisites"]], "Prerequisites: Install TensorRT-LLM and download models": [[20, "prerequisites-install-tensorrt-llm-and-download-models"]], "Profiling specific iterations on a trtllm-bench/trtllm-serve run": [[72, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"]], "Prompt-Lookup-Decoding": [[12, "prompt-lookup-decoding"]], "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs": [[26, null]], "PyExecutor": [[96, "pyexecutor"]], "PyTorch Backend": [[94, null]], "Python Bindings for the Executor API": [[3, "python-bindings-for-the-executor-api"]], "Python runtime (Not recommended to be used)": [[89, "python-runtime-not-recommended-to-be-used"]], "Quantization": [[36, "quantization"], [86, null], [94, "quantization"]], "Quantization APIs": [[19, "quantization-apis"]], "Quantization and Dequantization (Q/DQ)": [[90, "quantization-and-dequantization-q-dq"]], "Quantization in TensorRT-LLM": [[25, "quantization-in-tensorrt-llm"]], "Quantization in the PyTorch Flow": [[73, "quantization-in-the-pytorch-flow"]], "Quantized KV-Cache": [[77, "quantized-kv-cache"]], "Quick Start": [[94, "quick-start"]], "Quick Start Guide": [[88, null]], "Quickstart": [[73, "quickstart"]], "Rank Weights": [[15, "rank-weights"]], "Re-balanced the sparse experts": [[26, "re-balanced-the-sparse-experts"]], "ReDrafter": [[12, "redrafter"]], "Reduce Norm Fusion Plugin for Llama models:": [[80, "reduce-norm-fusion-plugin-for-llama-models"]], "Reduce Norm Fusion with User Buffers for Llama Models": [[77, "reduce-norm-fusion-with-user-buffers-for-llama-models"]], "Reference": [[14, "reference"], [64, null]], "Related Information": [[88, "related-information"]], "Relative Attention Bias (RAB)": [[5, "relative-attention-bias-rab"]], "Relax Acceptance Verification": [[26, "relax-acceptance-verification"]], "Relaxed Acceptance": [[27, "relaxed-acceptance"]], "Release Notes": [[93, null]], "Reproducing Benchmarked Results": [[74, "reproducing-benchmarked-results"]], "Reproducing steps": [[20, "reproducing-steps"]], "Request Additional Output": [[3, "request-additional-output"]], "ResourceManager": [[96, "resourcemanager"]], "Results": [[75, "results"]], "Revisiting Paged Context Attention and Context Chunking": [[79, "revisiting-paged-context-attention-and-context-chunking"]], "Rotary Positional Embedding (RoPE)": [[5, "rotary-positional-embedding-rope"]], "RouterGEMM": [[26, "routergemm"]], "Run gpt-2b + LoRA using Executor / cpp runtime": [[10, null]], "Run the Model": [[88, "run-the-model"]], "Running Throughput and Latency Benchmarks": [[75, "running-throughput-and-latency-benchmarks"]], "Running With Weight Streaming to Reduce GPU Memory Consumption": [[13, null]], "Running multi-modal models in the PyTorch Workflow": [[73, "running-multi-modal-models-in-the-pytorch-workflow"]], "Running the Benchmark": [[74, "running-the-benchmark"]], "Running with the PyTorch Workflow": [[73, "running-with-the-pytorch-workflow"]], "Runtime": [[1, null], [16, "runtime"], [87, null]], "Runtime Customization": [[36, "runtime-customization"]], "Runtime Optimizations": [[28, "runtime-optimizations"]], "Sampling": [[36, "sampling"]], "Sampling Parameters": [[6, "sampling-parameters"]], "Scaling factor(s)": [[5, "scaling-factor-s"]], "Scheduler": [[96, "scheduler"], [99, null]], "Scheduler Introduction": [[99, "scheduler-introduction"]], "Scripts": [[41, null], [63, null]], "Sending Requests with Different Beam Widths": [[3, "sending-requests-with-different-beam-widths"]], "Set power limits": [[73, "set-power-limits"]], "Situations that can prevent kv cache reuse": [[9, "situations-that-can-prevent-kv-cache-reuse"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[5, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Smart Router": [[26, "smart-router"]], "Software": [[91, "software"]], "Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)": [[26, "sparse-experts-as-gemms-only-works-when-moe-backend-cutlass"]], "Speculative Sampling": [[12, null]], "Speculative decoding arguments": [[29, "tensorrt_llm.commands.build-parse_arguments-speculative-decoding-arguments"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[25, null]], "Starting a Server": [[30, "starting-a-server"]], "Step 1. Write Modeling Part": [[14, "step-1-write-modeling-part"]], "Step 2. Implement Weight Conversion": [[14, "step-2-implement-weight-conversion"]], "Step 3. Register New Model": [[14, "step-3-register-new-model"]], "Step 4. Verify New Model": [[14, "step-4-verify-new-model"]], "Step-by-Step Guide": [[95, "step-by-step-guide"]], "StreamingLLM": [[5, "streamingllm"]], "Structured output with guided decoding": [[3, "structured-output-with-guided-decoding"]], "Summary": [[73, "summary"]], "Summary of Configuration Option Recommendations:": [[77, "summary-of-configuration-option-recommendations"], [80, "summary-of-configuration-option-recommendations"]], "Support Matrix": [[91, null]], "Support matrix": [[90, "support-matrix"]], "Supported C++ Header Files": [[65, "supported-c-header-files"]], "Supported Models": [[69, "supported-models"]], "Supported Quantization Modes": [[73, "supported-quantization-modes"]], "Syntax": [[30, "syntax"]], "System Level optimizations": [[26, "system-level-optimizations"]], "TRTLLM Backend": [[26, "trtllm-backend"]], "Table of Contents": [[20, "table-of-contents"], [26, "table-of-contents"], [27, "table-of-contents"], [28, "table-of-contents"], [78, "table-of-contents"], [95, "table-of-contents"]], "Technical Detail: The QuantMode Flags": [[90, "technical-detail-the-quantmode-flags"]], "Tensor Parallel vs Expert Parallel": [[4, "tensor-parallel-vs-expert-parallel"]], "Tensor-Related Methods": [[7, "tensor-related-methods"]], "TensorRT Compiler": [[16, "tensorrt-compiler"]], "TensorRT-LLM Architecture": [[18, null]], "TensorRT-LLM Benchmarking": [[73, null]], "TensorRT-LLM Build Workflow": [[19, null]], "TensorRT-LLM Checkpoint": [[15, null]], "TensorRT-LLM Model Weights Loader": [[17, null]], "TensorRT-LLM Release 0.10.0": [[93, "tensorrt-llm-release-0-10-0"]], "TensorRT-LLM Release 0.11.0": [[93, "tensorrt-llm-release-0-11-0"]], "TensorRT-LLM Release 0.12.0": [[93, "tensorrt-llm-release-0-12-0"]], "TensorRT-LLM Release 0.13.0": [[93, "tensorrt-llm-release-0-13-0"]], "TensorRT-LLM Release 0.14.0": [[93, "tensorrt-llm-release-0-14-0"]], "TensorRT-LLM Release 0.15.0": [[93, "tensorrt-llm-release-0-15-0"]], "TensorRT-LLM Release 0.16.0": [[93, "tensorrt-llm-release-0-16-0"]], "TensorRT-LLM Release 0.17.0": [[93, "tensorrt-llm-release-0-17-0"]], "TensorRT-LLM Release 0.18.0": [[93, "tensorrt-llm-release-0-18-0"]], "TensorRT-LLM Release 0.18.1": [[93, "tensorrt-llm-release-0-18-1"]], "TensorRT-LLM Release 0.18.2": [[93, "tensorrt-llm-release-0-18-2"]], "TensorRT-LLM Release 0.19.0": [[93, "tensorrt-llm-release-0-19-0"]], "TensorRT-LLM Release 0.7.1": [[93, "tensorrt-llm-release-0-7-1"]], "TensorRT-LLM Release 0.8.0": [[93, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.9.0": [[93, "tensorrt-llm-release-0-9-0"]], "The Executor Class": [[3, "the-executor-class"]], "The Request Class": [[3, "the-request-class"]], "The Response Class": [[3, "the-response-class"]], "The Result Class": [[3, "the-result-class"]], "Throughput Benchmarking": [[73, "throughput-benchmarking"]], "Throughput Measurements": [[74, "throughput-measurements"]], "Tips": [[92, "tips"]], "Tips and Troubleshooting": [[69, "tips-and-troubleshooting"]], "Tokenizer Customization": [[36, "tokenizer-customization"]], "Top Level API": [[96, "top-level-api"]], "Topology Requirements": [[11, "topology-requirements"]], "Translator": [[17, "translator"]], "Tree-based speculative decoding support": [[27, "tree-based-speculative-decoding-support"]], "Trouble shooting": [[17, "trouble-shooting"]], "Troubleshooting": [[92, null]], "Troubleshooting Tips and Pitfalls To Avoid": [[75, "troubleshooting-tips-and-pitfalls-to-avoid"]], "Troubleshooting and FAQ": [[2, "troubleshooting-and-faq"]], "Tuning Case Study": [[79, "tuning-case-study"], [79, "id2"]], "Tuning Max Batch Size": [[79, "tuning-max-batch-size"]], "Tuning Max Batch Size and Max Num Tokens": [[79, null]], "Tuning Max Num Tokens": [[79, "tuning-max-num-tokens"]], "Types of Events": [[8, "types-of-events"]], "Understand inference time GPU memory usage": [[89, "understand-inference-time-gpu-memory-usage"]], "Understanding the TensorRT-LLM scheduler": [[79, "understanding-the-tensorrt-llm-scheduler"]], "Upload the Docker Image to DockerHub": [[31, "upload-the-docker-image-to-dockerhub"]], "Usage": [[2, "usage"], [11, "usage"]], "Useful Build-Time Flags": [[80, null]], "Useful Runtime Options": [[81, null]], "Using Medusa with TensorRT-LLM": [[12, "using-medusa-with-tensorrt-llm"]], "Validated Networks for Benchmarking": [[73, "validated-networks-for-benchmarking"]], "Variables": [[74, "variables"]], "Visualize the PyTorch profiler results": [[72, "visualize-the-pytorch-profiler-results"]], "WIP: Chunked context support on DeepSeek models": [[20, "wip-chunked-context-support-on-deepseek-models"]], "WIP: Enable more features by default": [[20, "wip-enable-more-features-by-default"]], "Weight Bindings": [[16, "weight-bindings"]], "Weight Loading": [[95, "weight-loading"]], "Weights absorb and MQA": [[28, "weights-absorb-and-mqa"]], "Welcome to TensorRT-LLM\u2019s Documentation!": [[64, null]], "What Can You Do With TensorRT-LLM?": [[71, "what-can-you-do-with-tensorrt-llm"]], "What Triggers an Event?": [[8, "what-triggers-an-event"]], "What is H100 FP8?": [[22, "what-is-h100-fp8"]], "What\u2019s coming next": [[25, "whats-coming-next"]], "When to Use Graph Rewriting?": [[7, "when-to-use-graph-rewriting"]], "WindowBlockManager/BlockManager": [[8, "windowblockmanager-blockmanager"]], "Workflow": [[17, "workflow"], [73, "workflow"]], "Workload Profile": [[26, "workload-profile"]], "World Configuration": [[6, "world-configuration"]], "XQA Optimization": [[5, "xqa-optimization"]], "bufferManager.h": [[1, "buffermanager-h"]], "cacheCommunicator.h": [[0, "cachecommunicator-h"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "dataTransceiverState.h": [[0, "datatransceiverstate-h"]], "decoderState.h": [[1, "decoderstate-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "disaggServerUtil.h": [[0, "disaggserverutil-h"]], "disaggregated": [[30, "trtllm-serve-disaggregated"]], "disaggregated_mpi_worker": [[30, "trtllm-serve-disaggregated-mpi-worker"]], "eagleBuffers.h": [[1, "eaglebuffers-h"]], "eagleModule.h": [[1, "eaglemodule-h"]], "executor.h": [[0, "executor-h"]], "explicitDraftTokensBuffers.h": [[1, "explicitdrafttokensbuffers-h"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatched.h": [[1, "gptdecoderbatched-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatched.h": [[1, "igptdecoderbatched-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcNvlsMemory.h": [[1, "ipcnvlsmemory-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "lookaheadBuffers.h": [[1, "lookaheadbuffers-h"]], "lookaheadModule.h": [[1, "lookaheadmodule-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "medusaModule.h": [[1, "medusamodule-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "modelConfig.h": [[1, "modelconfig-h"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "rawEngine.h": [[1, "rawengine-h"]], "request.h": [[1, "request-h"]], "runtimeDefaults.h": [[1, "runtimedefaults-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "serialization.h": [[0, "serialization-h"]], "serve": [[30, "trtllm-serve-serve"]], "speculativeDecodingMode.h": [[1, "speculativedecodingmode-h"]], "speculativeDecodingModule.h": [[1, "speculativedecodingmodule-h"]], "tensor.h": [[0, "tensor-h"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "transferAgent.h": [[0, "transferagent-h"]], "trtllm-build": [[29, null]], "trtllm-serve": [[30, null], [30, "trtllm-serve"]], "types.h": [[0, "types-h"]], "worldConfig.h": [[1, "worldconfig-h"]]}, "docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "advanced/disaggregated-service", "advanced/executor", "advanced/expert-parallelism", "advanced/gpt-attention", "advanced/gpt-runtime", "advanced/graph-rewriting", "advanced/kv-cache-management", "advanced/kv-cache-reuse", "advanced/lora", "advanced/lowprecision-pcie-allreduce", "advanced/speculative-decoding", "advanced/weight-streaming", "architecture/add-model", "architecture/checkpoint", "architecture/core-concepts", "architecture/model-weights-loader", "architecture/overview", "architecture/workflow", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs", "commands/trtllm-build", "commands/trtllm-serve", "dev-on-cloud/build-image-to-dockerhub", "dev-on-cloud/dev-on-runpod", "examples/curl_chat_client", "examples/curl_chat_client_for_multimodal", "examples/curl_completion_client", "examples/customization", "examples/deepseek_r1_reasoning_parser", "examples/genai_perf_client", "examples/genai_perf_client_for_multimodal", "examples/index", "examples/llm_api_examples", "examples/llm_auto_parallel", "examples/llm_eagle2_decoding", "examples/llm_eagle_decoding", "examples/llm_guided_decoding", "examples/llm_inference", "examples/llm_inference_async", "examples/llm_inference_async_streaming", "examples/llm_inference_customize", "examples/llm_inference_distributed", "examples/llm_inference_kv_events", "examples/llm_logits_processor", "examples/llm_lookahead_decoding", "examples/llm_medusa_decoding", "examples/llm_mgmn_llm_distributed", "examples/llm_mgmn_trtllm_bench", "examples/llm_mgmn_trtllm_serve", "examples/llm_multilora", "examples/llm_quantization", "examples/openai_chat_client", "examples/openai_chat_client_for_multimodal", "examples/openai_completion_client", "examples/trtllm_serve_examples", "index", "installation/build-from-source-linux", "installation/grace-hopper", "installation/linux", "key-features", "llm-api/index", "llm-api/reference", "overview", "performance/perf-analysis", "performance/perf-benchmarking", "performance/perf-overview", "performance/performance-tuning-guide/benchmarking-default-performance", "performance/performance-tuning-guide/deciding-model-sharding-strategy", "performance/performance-tuning-guide/fp8-quantization", "performance/performance-tuning-guide/index", "performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens", "performance/performance-tuning-guide/useful-build-time-flags", "performance/performance-tuning-guide/useful-runtime-flags", "python-api/tensorrt_llm.functional", "python-api/tensorrt_llm.layers", "python-api/tensorrt_llm.models", "python-api/tensorrt_llm.plugin", "python-api/tensorrt_llm.quantization", "python-api/tensorrt_llm.runtime", "quick-start-guide", "reference/memory", "reference/precision", "reference/support-matrix", "reference/troubleshooting", "release-notes", "torch", "torch/adding_new_model", "torch/arch_overview", "torch/attention", "torch/kv_cache_manager", "torch/scheduler"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "advanced/disaggregated-service.md", "advanced/executor.md", "advanced/expert-parallelism.md", "advanced/gpt-attention.md", "advanced/gpt-runtime.md", "advanced/graph-rewriting.md", "advanced/kv-cache-management.md", "advanced/kv-cache-reuse.md", "advanced/lora.md", "advanced/lowprecision-pcie-allreduce.md", "advanced/speculative-decoding.md", "advanced/weight-streaming.md", "architecture/add-model.md", "architecture/checkpoint.md", "architecture/core-concepts.md", "architecture/model-weights-loader.md", "architecture/overview.md", "architecture/workflow.md", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md", "commands/trtllm-build.rst", "commands/trtllm-serve.rst", "dev-on-cloud/build-image-to-dockerhub.md", "dev-on-cloud/dev-on-runpod.md", "examples/curl_chat_client.rst", "examples/curl_chat_client_for_multimodal.rst", "examples/curl_completion_client.rst", "examples/customization.md", "examples/deepseek_r1_reasoning_parser.rst", "examples/genai_perf_client.rst", "examples/genai_perf_client_for_multimodal.rst", "examples/index.rst", "examples/llm_api_examples.rst", "examples/llm_auto_parallel.rst", "examples/llm_eagle2_decoding.rst", "examples/llm_eagle_decoding.rst", "examples/llm_guided_decoding.rst", "examples/llm_inference.rst", "examples/llm_inference_async.rst", "examples/llm_inference_async_streaming.rst", "examples/llm_inference_customize.rst", "examples/llm_inference_distributed.rst", "examples/llm_inference_kv_events.rst", "examples/llm_logits_processor.rst", "examples/llm_lookahead_decoding.rst", "examples/llm_medusa_decoding.rst", "examples/llm_mgmn_llm_distributed.rst", "examples/llm_mgmn_trtllm_bench.rst", "examples/llm_mgmn_trtllm_serve.rst", "examples/llm_multilora.rst", "examples/llm_quantization.rst", "examples/openai_chat_client.rst", "examples/openai_chat_client_for_multimodal.rst", "examples/openai_completion_client.rst", "examples/trtllm_serve_examples.rst", "index.rst", "installation/build-from-source-linux.md", "installation/grace-hopper.md", "installation/linux.md", "key-features.md", "llm-api/index.md", "llm-api/reference.rst", "overview.md", "performance/perf-analysis.md", "performance/perf-benchmarking.md", "performance/perf-overview.md", "performance/performance-tuning-guide/benchmarking-default-performance.md", "performance/performance-tuning-guide/deciding-model-sharding-strategy.md", "performance/performance-tuning-guide/fp8-quantization.md", "performance/performance-tuning-guide/index.rst", "performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md", "performance/performance-tuning-guide/useful-build-time-flags.md", "performance/performance-tuning-guide/useful-runtime-flags.md", "python-api/tensorrt_llm.functional.rst", "python-api/tensorrt_llm.layers.rst", "python-api/tensorrt_llm.models.rst", "python-api/tensorrt_llm.plugin.rst", "python-api/tensorrt_llm.quantization.rst", "python-api/tensorrt_llm.runtime.rst", "quick-start-guide.md", "reference/memory.md", "reference/precision.md", "reference/support-matrix.md", "reference/troubleshooting.md", "release-notes.md", "torch.md", "torch/adding_new_model.md", "torch/arch_overview.md", "torch/attention.md", "torch/kv_cache_manager.md", "torch/scheduler.md"], "indexentries": {"--backend": [[30, "cmdoption-trtllm-serve-serve-backend", false]], "--cluster_size": [[30, "cmdoption-trtllm-serve-serve-cluster_size", false]], "--config_file": [[30, "cmdoption-trtllm-serve-disaggregated-c", false], [30, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "--ep_size": [[30, "cmdoption-trtllm-serve-serve-ep_size", false]], "--extra_llm_api_options": [[30, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false]], "--gpus_per_node": [[30, "cmdoption-trtllm-serve-serve-gpus_per_node", false]], "--host": [[30, "cmdoption-trtllm-serve-serve-host", false]], "--kv_cache_free_gpu_memory_fraction": [[30, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false]], "--log_level": [[30, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false], [30, "cmdoption-trtllm-serve-serve-log_level", false]], "--max_batch_size": [[30, "cmdoption-trtllm-serve-serve-max_batch_size", false]], "--max_beam_width": [[30, "cmdoption-trtllm-serve-serve-max_beam_width", false]], "--max_num_tokens": [[30, "cmdoption-trtllm-serve-serve-max_num_tokens", false]], "--max_seq_len": [[30, "cmdoption-trtllm-serve-serve-max_seq_len", false]], "--num_postprocess_workers": [[30, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false]], "--port": [[30, "cmdoption-trtllm-serve-serve-port", false]], "--pp_size": [[30, "cmdoption-trtllm-serve-serve-pp_size", false]], "--reasoning_parser": [[30, "cmdoption-trtllm-serve-serve-reasoning_parser", false]], "--request_timeout": [[30, "cmdoption-trtllm-serve-disaggregated-r", false]], "--server_start_timeout": [[30, "cmdoption-trtllm-serve-disaggregated-t", false]], "--tokenizer": [[30, "cmdoption-trtllm-serve-serve-tokenizer", false]], "--tp_size": [[30, "cmdoption-trtllm-serve-serve-tp_size", false]], "--trust_remote_code": [[30, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "-c": [[30, "cmdoption-trtllm-serve-disaggregated-c", false], [30, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "-r": [[30, "cmdoption-trtllm-serve-disaggregated-r", false]], "-t": [[30, "cmdoption-trtllm-serve-disaggregated-t", false]], "__init__() (tensorrt_llm.llmapi.buildcacheconfig method)": [[70, "tensorrt_llm.llmapi.BuildCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildconfig method)": [[70, "tensorrt_llm.llmapi.BuildConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.completionoutput method)": [[70, "tensorrt_llm.llmapi.CompletionOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.disaggregatedparams method)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.guideddecodingparams method)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheretentionconfig method)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig method)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.mpicommsession method)": [[70, "tensorrt_llm.llmapi.MpiCommSession.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantconfig method)": [[70, "tensorrt_llm.llmapi.QuantConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput method)": [[70, "tensorrt_llm.llmapi.RequestOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.samplingparams method)": [[70, "tensorrt_llm.llmapi.SamplingParams.__init__", false]], "abort() (tensorrt_llm.llmapi.mpicommsession method)": [[70, "tensorrt_llm.llmapi.MpiCommSession.abort", false]], "abs() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.abs", false]], "abs() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.abs", false]], "activation() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.activation", false]], "adalayernorm (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNorm", false]], "adalayernormcontinuous (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous", false]], "adalayernormzero (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNormZero", false]], "adalayernormzerosingle (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle", false]], "add() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.add", false]], "add_input() (tensorrt_llm.functional.conditional method)": [[82, "tensorrt_llm.functional.Conditional.add_input", false]], "add_output() (tensorrt_llm.functional.conditional method)": [[82, "tensorrt_llm.functional.Conditional.add_output", false]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[87, "tensorrt_llm.runtime.KVCacheManager.add_sequence", false]], "add_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.add_special_tokens", false]], "additional_model_outputs (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.additional_model_outputs", false]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.alibi", false]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale", false]], "allgather() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.allgather", false]], "allreduce() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.allreduce", false]], "allreducefusionop (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.AllReduceFusionOp", false]], "allreduceparams (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.AllReduceParams", false]], "allreducestrategy (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.AllReduceStrategy", false]], "apply_batched_logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor", false]], "apply_llama3_scaling() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling", false]], "apply_rotary_pos_emb() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb", false]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm", false]], "apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm", false]], "arange() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.arange", false]], "argmax() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.argmax", false]], "assert_valid_quant_algo() (tensorrt_llm.models.gemmaforcausallm class method)": [[84, "tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo", false]], "assertion() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.assertion", false]], "attention (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.Attention", false]], "attentionmaskparams (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.AttentionMaskParams", false]], "attentionmasktype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.AttentionMaskType", false]], "attentionparams (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.AttentionParams", false]], "attn_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.attn_backend", false]], "attn_processors (tensorrt_llm.models.sd3transformer2dmodel property)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.attn_processors", false]], "audio_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir", false]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.AUTO", false]], "auto_deploy_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.auto_deploy_config", false]], "auto_parallel (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel", false]], "auto_parallel_config (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.auto_parallel_config", false]], "auto_parallel_config (tensorrt_llm.llmapi.trtllmargs property)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_config", false]], "auto_parallel_world_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_world_size", false]], "autotuner_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.autotuner_enabled", false]], "avg_pool2d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.avg_pool2d", false]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[83, "tensorrt_llm.layers.pooling.AvgPool2d", false]], "axes (tensorrt_llm.functional.sliceinputtype attribute)": [[82, "tensorrt_llm.functional.SliceInputType.axes", false]], "bad (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.bad", false]], "bad_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.bad_token_ids", false]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.bad_words_list", false]], "baichuanforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.BaichuanForCausalLM", false]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.batch_size", false]], "batchingtype (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.BatchingType", false]], "beam_search_diversity_rate (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate", false]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate", false]], "beam_width_array (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.beam_width_array", false]], "bert_attention() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.bert_attention", false]], "bertattention (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.BertAttention", false]], "bertforquestionanswering (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.BertForQuestionAnswering", false]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.BertForSequenceClassification", false]], "bertmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.BertModel", false]], "best_of (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.best_of", false]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.bidirectional", false]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm", false]], "blocksparse (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.blocksparse", false]], "blocksparseattnparams (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.BlockSparseAttnParams", false]], "bloomforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.BloomForCausalLM", false]], "bloommodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.BloomModel", false]], "broadcast_helper() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.broadcast_helper", false]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.buffer_allocated", false]], "build_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.build_config", false]], "build_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.build_config", false]], "buildcacheconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.BuildCacheConfig", false]], "buildconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.BuildConfig", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildCacheConfig.cache_root", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig property)": [[70, "id7", false]], "cachetransceiverconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.CacheTransceiverConfig", false]], "calculate_speculative_resource() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource", false]], "calib_batch_size (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.calib_batch_size", false]], "calib_batches (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.calib_batches", false]], "calib_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.calib_config", false]], "calib_dataset (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.calib_dataset", false]], "calib_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length", false]], "calibconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.CalibConfig", false]], "capacity_scheduler_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[70, "tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy", false]], "capacityschedulerpolicy (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.CapacitySchedulerPolicy", false]], "cast (class in tensorrt_llm.layers.cast)": [[83, "tensorrt_llm.layers.cast.Cast", false]], "cast() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.cast", false]], "cast() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.cast", false]], "categorical_sample() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.categorical_sample", false]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.causal", false]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.chatglm", false]], "chatglmconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.ChatGLMConfig", false]], "chatglmforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.ChatGLMForCausalLM", false]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.ChatGLMGenerationSession", false]], "chatglmmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.ChatGLMModel", false]], "check_config() (tensorrt_llm.models.decodermodel method)": [[84, "tensorrt_llm.models.DecoderModel.check_config", false]], "check_config() (tensorrt_llm.models.dit method)": [[84, "tensorrt_llm.models.DiT.check_config", false]], "check_config() (tensorrt_llm.models.encodermodel method)": [[84, "tensorrt_llm.models.EncoderModel.check_config", false]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[84, "tensorrt_llm.models.FalconForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[84, "tensorrt_llm.models.MPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[84, "tensorrt_llm.models.OPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[84, "tensorrt_llm.models.PhiForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[84, "tensorrt_llm.models.PretrainedModel.check_config", false]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.choices", false]], "chunk() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.chunk", false]], "clamp_val (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.clamp_val", false]], "clip() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.clip", false]], "clipvisiontransformer (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.CLIPVisionTransformer", false]], "cogvlmattention (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.CogVLMAttention", false]], "cogvlmconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.CogVLMConfig", false]], "cogvlmforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.CogVLMForCausalLM", false]], "cohereforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.CohereForCausalLM", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linear method)": [[83, "tensorrt_llm.layers.linear.Linear.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linearbase method)": [[83, "tensorrt_llm.layers.linear.LinearBase.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.rowlinear method)": [[83, "tensorrt_llm.layers.linear.RowLinear.collect_and_bias", false]], "columnlinear (in module tensorrt_llm.layers.linear)": [[83, "tensorrt_llm.layers.linear.ColumnLinear", false]], "combinedtimesteplabelembeddings (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings", false]], "combinedtimesteptextprojembeddings (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings", false]], "completionoutput (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.CompletionOutput", false]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.compute_relative_bias", false]], "concat() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.concat", false]], "conditional (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.Conditional", false]], "config_class (tensorrt_llm.models.baichuanforcausallm attribute)": [[84, "tensorrt_llm.models.BaichuanForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.chatglmforcausallm attribute)": [[84, "tensorrt_llm.models.ChatGLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cogvlmforcausallm attribute)": [[84, "tensorrt_llm.models.CogVLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cohereforcausallm attribute)": [[84, "tensorrt_llm.models.CohereForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.dbrxforcausallm attribute)": [[84, "tensorrt_llm.models.DbrxForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekforcausallm attribute)": [[84, "tensorrt_llm.models.DeepseekForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekv2forcausallm attribute)": [[84, "tensorrt_llm.models.DeepseekV2ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.eagleforcausallm attribute)": [[84, "tensorrt_llm.models.EagleForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.falconforcausallm attribute)": [[84, "tensorrt_llm.models.FalconForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gemmaforcausallm attribute)": [[84, "tensorrt_llm.models.GemmaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptforcausallm attribute)": [[84, "tensorrt_llm.models.GPTForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptjforcausallm attribute)": [[84, "tensorrt_llm.models.GPTJForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.llamaforcausallm attribute)": [[84, "tensorrt_llm.models.LLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.mambaforcausallm attribute)": [[84, "tensorrt_llm.models.MambaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.medusaforcausallm attribute)": [[84, "tensorrt_llm.models.MedusaForCausalLm.config_class", false]], "config_class (tensorrt_llm.models.mllamaforcausallm attribute)": [[84, "tensorrt_llm.models.MLLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phi3forcausallm attribute)": [[84, "tensorrt_llm.models.Phi3ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phiforcausallm attribute)": [[84, "tensorrt_llm.models.PhiForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.sd3transformer2dmodel attribute)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.config_class", false]], "constant() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.constant", false]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.constant_to_tensor_", false]], "constants_to_tensors_() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.constants_to_tensors_", false]], "context (tensorrt_llm.runtime.session property)": [[87, "tensorrt_llm.runtime.Session.context", false]], "context_chunking_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[70, "tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy", false]], "context_logits (tensorrt_llm.llmapi.requestoutput attribute)": [[70, "tensorrt_llm.llmapi.RequestOutput.context_logits", false]], "context_mem_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.context_mem_size", false]], "context_mem_size (tensorrt_llm.runtime.session property)": [[87, "tensorrt_llm.runtime.Session.context_mem_size", false]], "contextchunkingpolicy (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.ContextChunkingPolicy", false]], "conv1d (class in tensorrt_llm.layers.conv)": [[83, "tensorrt_llm.layers.conv.Conv1d", false]], "conv1d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.conv1d", false]], "conv2d (class in tensorrt_llm.layers.conv)": [[83, "tensorrt_llm.layers.conv.Conv2d", false]], "conv2d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.conv2d", false]], "conv3d (class in tensorrt_llm.layers.conv)": [[83, "tensorrt_llm.layers.conv.Conv3d", false]], "conv3d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.conv3d", false]], "conv_kernel (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.conv_kernel", false]], "conv_kernel (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.conv_kernel", false]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.conv_transpose2d", false]], "convert_load_format() (tensorrt_llm.llmapi.torchllmargs class method)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format", false]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[83, "tensorrt_llm.layers.conv.ConvTranspose2d", false]], "copy_on_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse", false]], "cos() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.cos", false]], "cp_split_plugin() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.cp_split_plugin", false]], "cpp_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e", false]], "cpp_llm_only (tensorrt_llm.runtime.multimodalmodelrunner property)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only", false]], "create_allreduce_plugin() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.create_allreduce_plugin", false]], "create_attention_const_params() (tensorrt_llm.layers.attention.attention static method)": [[83, "tensorrt_llm.layers.attention.Attention.create_attention_const_params", false]], "create_fake_weight() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight", false]], "create_runtime_defaults() (tensorrt_llm.models.pretrainedconfig static method)": [[84, "tensorrt_llm.models.PretrainedConfig.create_runtime_defaults", false]], "create_sinusoidal_positions() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions", false]], "create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin", false]], "create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin", false]], "create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.ropeembeddingutils method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope", false]], "create_sinusoidal_positions_yarn() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn", false]], "cropped_pos_embed() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[83, "tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed", false]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.cross_attention", false]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.cross_attention", false]], "cross_kv_cache_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction", false]], "ctx_request_id (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id", false]], "cuda_graph_batch_sizes (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_batch_sizes", false]], "cuda_graph_cache_size (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[70, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size", false]], "cuda_graph_max_batch_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_max_batch_size", false]], "cuda_graph_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[70, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode", false]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode", false]], "cuda_graph_padding_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_padding_enabled", false]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard", false]], "cuda_stream_sync() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.cuda_stream_sync", false]], "cumsum() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.cumsum", false]], "cumulative_logprob (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob", false]], "custom_mask (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.custom_mask", false]], "data (tensorrt_llm.functional.sliceinputtype attribute)": [[82, "tensorrt_llm.functional.SliceInputType.data", false]], "dbrxconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.DbrxConfig", false]], "dbrxforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.DbrxForCausalLM", false]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.debug_mode", false]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save", false]], "decode() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.decode", false]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.decode_batch", false]], "decode_duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms", false]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.decode_regular", false]], "decode_retention_priority (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority", false]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.decode_stream", false]], "decode_words_list() (in module tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.decode_words_list", false]], "decodermodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.DecoderModel", false]], "decoding_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.decoding_config", false]], "decoding_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.decoding_config", false]], "decoding_type (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type", false]], "deepseekforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.DeepseekForCausalLM", false]], "deepseekv2attention (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.DeepseekV2Attention", false]], "deepseekv2forcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.DeepseekV2ForCausalLM", false]], "default_plugin_config() (tensorrt_llm.models.cogvlmforcausallm method)": [[84, "tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config", false]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[84, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config", false]], "deferred (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.deferred", false]], "detokenize (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.detokenize", false]], "device (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.device", false]], "device (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.device", false]], "diffusersattention (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.DiffusersAttention", false]], "dimrange (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.DimRange", false]], "directory (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.directory", false]], "disable (tensorrt_llm.functional.sidestreamidtype attribute)": [[82, "tensorrt_llm.functional.SideStreamIDType.disable", false]], "disable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking", false]], "disable_overlap_scheduler (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler", false]], "disaggregated_params (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.disaggregated_params", false]], "disaggregatedparams (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams", false]], "dit (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.DiT", false]], "div() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.div", false]], "dora_plugin() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.dora_plugin", false]], "draft_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens", false]], "draft_tokens_external (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL", false]], "dry_run (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.dry_run", false]], "dtype (tensorrt_llm.functional.tensor property)": [[82, "tensorrt_llm.functional.Tensor.dtype", false]], "dtype (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.dtype", false]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.dtype", false]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[87, "tensorrt_llm.runtime.TensorInfo.dtype", false]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers", false]], "duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms", false]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.dynamic", false]], "dynamic_batch_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[70, "tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config", false]], "dynamic_batch_moving_average_window (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[70, "tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window", false]], "dynamic_tree_max_topk (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK", false]], "dynamicbatchconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.DynamicBatchConfig", false]], "eagle (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.EAGLE", false]], "eagle3_one_model (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model", false]], "eagle_choices (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices", false]], "eagledecodingconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig", false]], "eagleforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.EagleForCausalLM", false]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria", false]], "early_stopping (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.early_stopping", false]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.early_stopping", false]], "einsum() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.einsum", false]], "elementwise_binary() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.elementwise_binary", false]], "embedding (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.Embedding", false]], "embedding() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.embedding", false]], "embedding_bias (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.embedding_bias", false]], "embedding_parallel_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode", false]], "enable_batch_size_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[70, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning", false]], "enable_block_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse", false]], "enable_build_cache (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache", false]], "enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[70, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc", false]], "enable_debug_output (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.enable_debug_output", false]], "enable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking", false]], "enable_iter_perf_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats", false]], "enable_iter_req_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats", false]], "enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker", false]], "enable_max_num_tokens_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[70, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning", false]], "enable_min_latency (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency", false]], "enable_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse", false]], "enable_tqdm (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm", false]], "enable_trtllm_sampler (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.enable_trtllm_sampler", false]], "encdecmodelrunner (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.EncDecModelRunner", false]], "encoder_run() (tensorrt_llm.runtime.encdecmodelrunner method)": [[87, "tensorrt_llm.runtime.EncDecModelRunner.encoder_run", false]], "encodermodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.EncoderModel", false]], "end_id (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.end_id", false]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.end_id", false]], "engine (tensorrt_llm.runtime.session property)": [[87, "tensorrt_llm.runtime.Session.engine", false]], "engine_inspector (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.engine_inspector", false]], "eq() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.eq", false]], "equal_progress (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[70, "tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS", false]], "event_buffer_max_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size", false]], "exclude_input_from_output (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output", false]], "exclude_modules (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.exclude_modules", false]], "exp() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.exp", false]], "expand() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.expand", false]], "expand_dims() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.expand_dims", false]], "expand_dims_like() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.expand_dims_like", false]], "expand_mask() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.expand_mask", false]], "explicit_draft_tokens (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS", false]], "extended_runtime_perf_knob_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config", false]], "extendedruntimeperfknobconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig", false]], "extra_resource_managers (tensorrt_llm.llmapi.torchllmargs property)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers", false]], "falconconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.FalconConfig", false]], "falconforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.FalconForCausalLM", false]], "falconmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.FalconModel", false]], "fast_build (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.fast_build", false]], "fc_gate() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[83, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate", false]], "fc_gate_dora() (in module tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.fc_gate_dora", false]], "fc_gate_lora() (in module tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.fc_gate_lora", false]], "fc_gate_plugin() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[83, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin", false]], "field_name (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "id12", false], [70, "id15", false], [70, "id18", false], [70, "tensorrt_llm.llmapi.TorchLlmArgs.field_name", false]], "field_name (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "id21", false], [70, "id24", false], [70, "id27", false], [70, "id30", false], [70, "id33", false], [70, "tensorrt_llm.llmapi.TrtLlmArgs.field_name", false]], "fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[83, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope", false]], "fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[83, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope", false]], "fill_attention_params() (tensorrt_llm.layers.attention.attention static method)": [[83, "tensorrt_llm.layers.attention.Attention.fill_attention_params", false]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[83, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list", false]], "fill_value (tensorrt_llm.functional.sliceinputtype attribute)": [[82, "tensorrt_llm.functional.SliceInputType.fill_value", false]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits", false]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.finalize_decoder", false]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path", false]], "finish_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.finish_reason", false]], "finished (tensorrt_llm.llmapi.requestoutput attribute)": [[70, "tensorrt_llm.llmapi.RequestOutput.finished", false]], "first_come_first_served (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[70, "tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED", false]], "first_gen_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens", false]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.first_layer", false]], "flatten() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.flatten", false]], "flatten() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.flatten", false]], "flip() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.flip", false]], "floordiv() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.floordiv", false]], "fmt_dim (c macro)": [[1, "c.FMT_DIM", false]], "for_each_rank() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.for_each_rank", false]], "force_num_profiles (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.force_num_profiles", false]], "forward() (tensorrt_llm.layers.activation.mish method)": [[83, "tensorrt_llm.layers.activation.Mish.forward", false]], "forward() (tensorrt_llm.layers.attention.attention method)": [[83, "tensorrt_llm.layers.attention.Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[83, "tensorrt_llm.layers.attention.BertAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.cogvlmattention method)": [[83, "tensorrt_llm.layers.attention.CogVLMAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[83, "tensorrt_llm.layers.attention.DeepseekV2Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[83, "tensorrt_llm.layers.attention.DiffusersAttention.forward", false]], "forward() (tensorrt_llm.layers.cast.cast method)": [[83, "tensorrt_llm.layers.cast.Cast.forward", false]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[83, "tensorrt_llm.layers.conv.Conv1d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[83, "tensorrt_llm.layers.conv.Conv2d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv3d method)": [[83, "tensorrt_llm.layers.conv.Conv3d.forward", false]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[83, "tensorrt_llm.layers.conv.ConvTranspose2d.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteplabelembeddings method)": [[83, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteptextprojembeddings method)": [[83, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[83, "tensorrt_llm.layers.embedding.Embedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.labelembedding method)": [[83, "tensorrt_llm.layers.embedding.LabelEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.pixartalphatextprojection method)": [[83, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward", false]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[83, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[83, "tensorrt_llm.layers.embedding.SD3PatchEmbed.forward", false]], "forward() (tensorrt_llm.layers.embedding.timestepembedding method)": [[83, "tensorrt_llm.layers.embedding.TimestepEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.timesteps method)": [[83, "tensorrt_llm.layers.embedding.Timesteps.forward", false]], "forward() (tensorrt_llm.layers.linear.linearbase method)": [[83, "tensorrt_llm.layers.linear.LinearBase.forward", false]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[83, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[83, "tensorrt_llm.layers.mlp.GatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearactivation method)": [[83, "tensorrt_llm.layers.mlp.LinearActivation.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearapproximategelu method)": [[83, "tensorrt_llm.layers.mlp.LinearApproximateGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargeglu method)": [[83, "tensorrt_llm.layers.mlp.LinearGEGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargelu method)": [[83, "tensorrt_llm.layers.mlp.LinearGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearswiglu method)": [[83, "tensorrt_llm.layers.mlp.LinearSwiGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[83, "tensorrt_llm.layers.mlp.MLP.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernorm method)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormcontinuous method)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzero method)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNormZero.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzerosingle method)": [[83, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward", false]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[83, "tensorrt_llm.layers.normalization.GroupNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[83, "tensorrt_llm.layers.normalization.LayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[83, "tensorrt_llm.layers.normalization.RmsNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.sd35adalayernormzerox method)": [[83, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward", false]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[83, "tensorrt_llm.layers.pooling.AvgPool2d.forward", false]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[84, "tensorrt_llm.models.BertForQuestionAnswering.forward", false]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[84, "tensorrt_llm.models.BertForSequenceClassification.forward", false]], "forward() (tensorrt_llm.models.bertmodel method)": [[84, "tensorrt_llm.models.BertModel.forward", false]], "forward() (tensorrt_llm.models.bloommodel method)": [[84, "tensorrt_llm.models.BloomModel.forward", false]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[84, "tensorrt_llm.models.ChatGLMModel.forward", false]], "forward() (tensorrt_llm.models.clipvisiontransformer method)": [[84, "tensorrt_llm.models.CLIPVisionTransformer.forward", false]], "forward() (tensorrt_llm.models.decodermodel method)": [[84, "tensorrt_llm.models.DecoderModel.forward", false]], "forward() (tensorrt_llm.models.dit method)": [[84, "tensorrt_llm.models.DiT.forward", false]], "forward() (tensorrt_llm.models.eagleforcausallm method)": [[84, "tensorrt_llm.models.EagleForCausalLM.forward", false]], "forward() (tensorrt_llm.models.encodermodel method)": [[84, "tensorrt_llm.models.EncoderModel.forward", false]], "forward() (tensorrt_llm.models.falconmodel method)": [[84, "tensorrt_llm.models.FalconModel.forward", false]], "forward() (tensorrt_llm.models.gptjmodel method)": [[84, "tensorrt_llm.models.GPTJModel.forward", false]], "forward() (tensorrt_llm.models.gptmodel method)": [[84, "tensorrt_llm.models.GPTModel.forward", false]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[84, "tensorrt_llm.models.GPTNeoXModel.forward", false]], "forward() (tensorrt_llm.models.llamamodel method)": [[84, "tensorrt_llm.models.LLaMAModel.forward", false]], "forward() (tensorrt_llm.models.llavanextvisionwrapper method)": [[84, "tensorrt_llm.models.LlavaNextVisionWrapper.forward", false]], "forward() (tensorrt_llm.models.mambaforcausallm method)": [[84, "tensorrt_llm.models.MambaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mllamaforcausallm method)": [[84, "tensorrt_llm.models.MLLaMAForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mptmodel method)": [[84, "tensorrt_llm.models.MPTModel.forward", false]], "forward() (tensorrt_llm.models.optmodel method)": [[84, "tensorrt_llm.models.OPTModel.forward", false]], "forward() (tensorrt_llm.models.phi3model method)": [[84, "tensorrt_llm.models.Phi3Model.forward", false]], "forward() (tensorrt_llm.models.phimodel method)": [[84, "tensorrt_llm.models.PhiModel.forward", false]], "forward() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[84, "tensorrt_llm.models.RecurrentGemmaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.redrafterforcausallm method)": [[84, "tensorrt_llm.models.ReDrafterForCausalLM.forward", false]], "forward() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.forward", false]], "forward() (tensorrt_llm.models.whisperencoder method)": [[84, "tensorrt_llm.models.WhisperEncoder.forward", false]], "forward_with_cfg() (tensorrt_llm.models.dit method)": [[84, "tensorrt_llm.models.DiT.forward_with_cfg", false]], "forward_without_cfg() (tensorrt_llm.models.dit method)": [[84, "tensorrt_llm.models.DiT.forward_without_cfg", false]], "fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.FP8", false]], "fp8_block_scales (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES", false]], "fp8_per_channel_per_token (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN", false]], "free_gpu_memory_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction", false]], "frequency_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.frequency_penalty", false]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty", false]], "from_arguments() (tensorrt_llm.models.speculativedecodingmode static method)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.from_arguments", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedconfig class method)": [[84, "tensorrt_llm.models.PretrainedConfig.from_checkpoint", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[84, "tensorrt_llm.models.PretrainedModel.from_checkpoint", false]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[84, "tensorrt_llm.models.PretrainedModel.from_config", false]], "from_dict() (tensorrt_llm.llmapi.buildconfig class method)": [[70, "tensorrt_llm.llmapi.BuildConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.calibconfig class method)": [[70, "tensorrt_llm.llmapi.CalibConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[70, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.quantconfig class method)": [[70, "tensorrt_llm.llmapi.QuantConfig.from_dict", false]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[84, "tensorrt_llm.models.PretrainedConfig.from_dict", false]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[87, "tensorrt_llm.runtime.ModelRunner.from_dir", false]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir", false]], "from_engine() (tensorrt_llm.runtime.encdecmodelrunner class method)": [[87, "tensorrt_llm.runtime.EncDecModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[87, "tensorrt_llm.runtime.ModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.session static method)": [[87, "tensorrt_llm.runtime.Session.from_engine", false]], "from_hugging_face() (tensorrt_llm.models.baichuanforcausallm class method)": [[84, "tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmconfig class method)": [[84, "tensorrt_llm.models.ChatGLMConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmforcausallm class method)": [[84, "tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cogvlmforcausallm class method)": [[84, "tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cohereforcausallm class method)": [[84, "tensorrt_llm.models.CohereForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekforcausallm class method)": [[84, "tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekv2forcausallm class method)": [[84, "tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.eagleforcausallm class method)": [[84, "tensorrt_llm.models.EagleForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconconfig class method)": [[84, "tensorrt_llm.models.FalconConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconforcausallm class method)": [[84, "tensorrt_llm.models.FalconForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaconfig class method)": [[84, "tensorrt_llm.models.GemmaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[84, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptconfig class method)": [[84, "tensorrt_llm.models.GPTConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptforcausallm class method)": [[84, "tensorrt_llm.models.GPTForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjconfig class method)": [[84, "tensorrt_llm.models.GPTJConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjforcausallm class method)": [[84, "tensorrt_llm.models.GPTJForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaconfig class method)": [[84, "tensorrt_llm.models.LLaMAConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[84, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionconfig class method)": [[84, "tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionwrapper class method)": [[84, "tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mambaforcausallm class method)": [[84, "tensorrt_llm.models.MambaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaconfig class method)": [[84, "tensorrt_llm.models.MedusaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaforcausallm class method)": [[84, "tensorrt_llm.models.MedusaForCausalLm.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mllamaforcausallm class method)": [[84, "tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phi3forcausallm class method)": [[84, "tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phiforcausallm class method)": [[84, "tensorrt_llm.models.PhiForCausalLM.from_hugging_face", false]], "from_json_file() (tensorrt_llm.llmapi.buildconfig class method)": [[70, "tensorrt_llm.llmapi.BuildConfig.from_json_file", false]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[84, "tensorrt_llm.models.PretrainedConfig.from_json_file", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaconfig class method)": [[84, "tensorrt_llm.models.LLaMAConfig.from_meta_ckpt", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[84, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt", false]], "from_nemo() (tensorrt_llm.models.gptconfig class method)": [[84, "tensorrt_llm.models.GPTConfig.from_nemo", false]], "from_nemo() (tensorrt_llm.models.gptforcausallm class method)": [[84, "tensorrt_llm.models.GPTForCausalLM.from_nemo", false]], "from_pretrained() (tensorrt_llm.models.sd3transformer2dmodel class method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.from_pretrained", false]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[87, "tensorrt_llm.runtime.Session.from_serialized_engine", false]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.from_string", false]], "from_string() (tensorrt_llm.functional.rotaryscalingtype static method)": [[82, "tensorrt_llm.functional.RotaryScalingType.from_string", false]], "fuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections", false]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.FusedGatedMLP", false]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[82, "tensorrt_llm.functional.MLPType.FusedGatedMLP", false]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.GatedMLP", false]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[82, "tensorrt_llm.functional.MLPType.GatedMLP", false]], "gather() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gather", false]], "gather_context_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits", false]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gather_last_token_logits", false]], "gather_nd() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gather_nd", false]], "gegelu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gegelu", false]], "geglu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.geglu", false]], "gelu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gelu", false]], "gemm_allreduce() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gemm_allreduce", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin", false]], "gemm_swiglu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gemm_swiglu", false]], "gemma2_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[84, "tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS", false]], "gemma2_config() (tensorrt_llm.models.gemmaconfig method)": [[84, "tensorrt_llm.models.GemmaConfig.gemma2_config", false]], "gemma3_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[84, "tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS", false]], "gemma3_config() (tensorrt_llm.models.gemmaconfig method)": [[84, "tensorrt_llm.models.GemmaConfig.gemma3_config", false]], "gemma_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[84, "tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS", false]], "gemmaconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GemmaConfig", false]], "gemmaforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GemmaForCausalLM", false]], "generate() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.generate", false]], "generate() (tensorrt_llm.runtime.encdecmodelrunner method)": [[87, "tensorrt_llm.runtime.EncDecModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[87, "tensorrt_llm.runtime.ModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.generate", false]], "generate() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[87, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate", false]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.generate_alibi_biases", false]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.generate_alibi_slopes", false]], "generate_async() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.generate_async", false]], "generate_logn_scaling() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.generate_logn_scaling", false]], "generation_logits (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.generation_logits", false]], "generationsequence (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.GenerationSequence", false]], "generationsession (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.GenerationSession", false]], "get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid", false]], "get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed", false]], "get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid", false]], "get_audio_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features", false]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[87, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx", false]], "get_block_offsets() (tensorrt_llm.runtime.kvcachemanager method)": [[87, "tensorrt_llm.runtime.KVCacheManager.get_block_offsets", false]], "get_comm() (tensorrt_llm.llmapi.mpicommsession method)": [[70, "tensorrt_llm.llmapi.MpiCommSession.get_comm", false]], "get_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.get_config_group", false]], "get_context_phase_params() (tensorrt_llm.llmapi.disaggregatedparams method)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params", false]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[83, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value", false]], "get_hf_config() (tensorrt_llm.models.gemmaconfig static method)": [[84, "tensorrt_llm.models.GemmaConfig.get_hf_config", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.get_kv_cache_events", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.get_kv_cache_events_async", false]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens", false]], "get_num_heads_kv() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.get_num_heads_kv", false]], "get_parent() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.get_parent", false]], "get_pytorch_backend_config() (tensorrt_llm.llmapi.torchllmargs method)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.get_pytorch_backend_config", false]], "get_request_type() (tensorrt_llm.llmapi.disaggregatedparams method)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.get_request_type", false]], "get_rope_index() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index", false]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[87, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx", false]], "get_stats() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.get_stats", false]], "get_stats_async() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.get_stats_async", false]], "get_timestep_embedding() (in module tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.get_timestep_embedding", false]], "get_users() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.get_users", false]], "get_visual_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features", false]], "get_weight() (tensorrt_llm.layers.linear.linearbase method)": [[83, "tensorrt_llm.layers.linear.LinearBase.get_weight", false]], "gpt_attention() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gpt_attention", false]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin", false]], "gptconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTConfig", false]], "gptforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTForCausalLM", false]], "gptjconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTJConfig", false]], "gptjforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTJForCausalLM", false]], "gptjmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTJModel", false]], "gptmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTModel", false]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTNeoXForCausalLM", false]], "gptneoxmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.GPTNeoXModel", false]], "gpu_weights_percent (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.gpu_weights_percent", false]], "grammar (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams.grammar", false]], "greedy_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling", false]], "group_norm() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.group_norm", false]], "group_size (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.group_size", false]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.GroupNorm", false]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[82, "tensorrt_llm.functional.LayerNormType.GroupNorm", false]], "gt() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.gt", false]], "guaranteed_no_evict (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[70, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT", false]], "guided_decoding (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.guided_decoding", false]], "guideddecodingparams (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams", false]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.handle_per_step", false]], "has_affine() (tensorrt_llm.functional.allreduceparams method)": [[82, "tensorrt_llm.functional.AllReduceParams.has_affine", false]], "has_bias() (tensorrt_llm.functional.allreduceparams method)": [[82, "tensorrt_llm.functional.AllReduceParams.has_bias", false]], "has_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.has_config_group", false]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.has_position_embedding", false]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.has_position_embedding", false]], "has_scale() (tensorrt_llm.functional.allreduceparams method)": [[82, "tensorrt_llm.functional.AllReduceParams.has_scale", false]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding", false]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding", false]], "has_zero_point (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.has_zero_point", false]], "head_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.head_size", false]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.head_size", false]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size", false]], "host_cache_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.host_cache_size", false]], "identity() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.identity", false]], "ignore_eos (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.ignore_eos", false]], "include_stop_str_in_output (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output", false]], "index (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.index", false]], "index_select() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.index_select", false]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[87, "tensorrt_llm.runtime.Session.infer_shapes", false]], "inflight (tensorrt_llm.llmapi.batchingtype attribute)": [[70, "tensorrt_llm.llmapi.BatchingType.INFLIGHT", false]], "init_audio_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder", false]], "init_image_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder", false]], "init_llm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.init_llm", false]], "init_processor() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.init_processor", false]], "init_tokenizer() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer", false]], "input_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.input_timing_cache", false]], "int8 (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.INT8", false]], "int_clip() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.int_clip", false]], "interpolate() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.interpolate", false]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi", false]], "is_deferred() (tensorrt_llm.functional.positionembeddingtype method)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.is_deferred", false]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.is_dynamic", false]], "is_gated_activation() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.is_gated_activation", false]], "is_gemma_2 (tensorrt_llm.models.gemmaconfig property)": [[84, "tensorrt_llm.models.GemmaConfig.is_gemma_2", false]], "is_gemma_3 (tensorrt_llm.models.gemmaconfig property)": [[84, "tensorrt_llm.models.GemmaConfig.is_gemma_3", false]], "is_keep_all (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all", false]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode", false]], "is_module_excluded_from_quantization() (tensorrt_llm.llmapi.quantconfig method)": [[70, "tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization", false]], "is_mrope() (tensorrt_llm.functional.positionembeddingtype method)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.is_mrope", false]], "is_public_pool (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool", false]], "is_redrafter_mode (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.is_redrafter_mode", false]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.is_rope", false]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.is_trt_wrapper", false]], "is_use_oldest (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest", false]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[83, "tensorrt_llm.layers.attention.AttentionParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[83, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid", false]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[83, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn", false]], "joint_attn_forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[83, "tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward", false]], "json (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams.json", false]], "json_object (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams.json_object", false]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.KeyValueCacheParams", false]], "kv_cache_dtype (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_dtype", false]], "kv_cache_quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo", false]], "kv_cache_type (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.kv_cache_type", false]], "kv_dtype (tensorrt_llm.models.pretrainedconfig property)": [[84, "tensorrt_llm.models.PretrainedConfig.kv_dtype", false]], "kvcacheconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.KvCacheConfig", false]], "kvcachemanager (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.KVCacheManager", false]], "kvcacheretentionconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig", false]], "kvcacheretentionconfig.tokenrangeretentionconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig", false]], "labelembedding (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.LabelEmbedding", false]], "language_adapter_config (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.language_adapter_config", false]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.last_layer", false]], "last_process_for_ub (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB", false]], "layer_norm() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.layer_norm", false]], "layer_quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[70, "tensorrt_llm.llmapi.QuantConfig.layer_quant_mode", false]], "layer_types (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.layer_types", false]], "layernorm (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.LayerNorm", false]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[82, "tensorrt_llm.functional.LayerNormType.LayerNorm", false]], "layernormpositiontype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.LayerNormPositionType", false]], "layernormtype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.LayerNormType", false]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute", false]], "length (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.length", false]], "length (tensorrt_llm.llmapi.completionoutput property)": [[70, "id2", false]], "length_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.length_penalty", false]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.length_penalty", false]], "linear (class in tensorrt_llm.layers.linear)": [[83, "tensorrt_llm.layers.linear.Linear", false]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.linear", false]], "linearactivation (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.LinearActivation", false]], "linearapproximategelu (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.LinearApproximateGELU", false]], "linearbase (class in tensorrt_llm.layers.linear)": [[83, "tensorrt_llm.layers.linear.LinearBase", false]], "lineargeglu (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.LinearGEGLU", false]], "lineargelu (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.LinearGELU", false]], "linearswiglu (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.LinearSwiGLU", false]], "llama3 (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.llama3", false]], "llamaconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.LLaMAConfig", false]], "llamaforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.LLaMAForCausalLM", false]], "llamamodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.LLaMAModel", false]], "llavanextvisionconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.LlavaNextVisionConfig", false]], "llavanextvisionwrapper (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.LlavaNextVisionWrapper", false]], "llm (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.LLM", false]], "llm_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir", false]], "llmargs (in module tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.LlmArgs", false]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[84, "tensorrt_llm.models.PretrainedModel.load", false]], "load() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.load", false]], "load_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.load_format", false]], "load_test_audio() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio", false]], "load_test_data() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_data", false]], "locate_accepted_draft_tokens() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens", false]], "location (tensorrt_llm.functional.tensor property)": [[82, "tensorrt_llm.functional.Tensor.location", false]], "log() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.log", false]], "log() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.log", false]], "log_softmax() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.log_softmax", false]], "logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.logits_processor", false]], "logitsprocessor (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.LogitsProcessor", false]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.LogitsProcessorList", false]], "logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.logprobs", false]], "logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.logprobs", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.logprobs_diff", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput property)": [[70, "id3", false]], "long_rope (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.long_rope", false]], "longrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.longrope", false]], "lookahead_config (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.lookahead_config", false]], "lookahead_decoding (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING", false]], "lookaheaddecodingconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig", false]], "lora_config (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.lora_config", false]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.lora_plugin", false]], "lora_plugin() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.lora_plugin", false]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.lora_target_modules", false]], "low_latency_gemm() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.low_latency_gemm", false]], "low_latency_gemm_swiglu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.low_latency_gemm_swiglu", false]], "lowprecision (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION", false]], "lt() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.lt", false]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.make_causal_mask", false]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.mamba_conv1d", false]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin", false]], "mambaforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.MambaForCausalLM", false]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.mapping", false]], "mapping (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.mapping", false]], "mark_output() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.mark_output", false]], "masked_scatter() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.masked_scatter", false]], "masked_select() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.masked_select", false]], "matmul() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.matmul", false]], "max() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.max", false]], "max() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.max", false]], "max_attention_window (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.max_attention_window", false]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size", false]], "max_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.max_batch_size", false]], "max_beam_width (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_beam_width", false]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.max_beam_width", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig property)": [[70, "id8", false]], "max_cpu_loras (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.max_cpu_loras", false]], "max_cpu_loras (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.max_cpu_loras", false]], "max_draft_len (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_draft_len", false]], "max_draft_tokens (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.max_draft_tokens", false]], "max_encoder_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len", false]], "max_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_input_len", false]], "max_lora_rank (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.max_lora_rank", false]], "max_lora_rank (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.max_lora_rank", false]], "max_loras (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.max_loras", false]], "max_loras (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.max_loras", false]], "max_matching_ngram_size (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size", false]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens", false]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens", false]], "max_ngram_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size", false]], "max_non_leaves_per_layer (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer", false]], "max_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[70, "tensorrt_llm.llmapi.CacheTransceiverConfig.max_num_tokens", false]], "max_prompt_embedding_table_size (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildCacheConfig.max_records", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig property)": [[70, "id9", false]], "max_seq_len (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.max_seq_len", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.max_sequence_length", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length", false]], "max_tokens (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.max_tokens", false]], "max_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.max_tokens", false]], "max_utilization (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[70, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION", false]], "max_verification_set_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size", false]], "max_window_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size", false]], "maximum() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.maximum", false]], "mean() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.mean", false]], "mean() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.mean", false]], "medusa (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA", false]], "medusa_choices (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices", false]], "medusa_decode_and_verify() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify", false]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.medusa_paths", false]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets", false]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.medusa_temperature", false]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.medusa_topks", false]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids", false]], "medusaconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.MedusaConfig", false]], "medusadecodingconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.MedusaDecodingConfig", false]], "medusaforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.MedusaForCausalLm", false]], "meshgrid2d() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.meshgrid2d", false]], "min() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.min", false]], "min_latency (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY", false]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.min_length", false]], "min_p (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.min_p", false]], "min_p (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.min_p", false]], "min_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.min_tokens", false]], "minimum() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.minimum", false]], "mish (class in tensorrt_llm.layers.activation)": [[83, "tensorrt_llm.layers.activation.Mish", false]], "mixed_precision (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION", false]], "mixed_sampler (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.mixed_sampler", false]], "mllamaforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.MLLaMAForCausalLM", false]], "mlp (class in tensorrt_llm.layers.mlp)": [[83, "tensorrt_llm.layers.mlp.MLP", false]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[82, "tensorrt_llm.functional.MLPType.MLP", false]], "mlptype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.MLPType", false]], "model": [[30, "cmdoption-trtllm-serve-serve-arg-MODEL", false]], "model_config (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[70, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[70, "tensorrt_llm.llmapi.DynamicBatchConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[70, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[70, "tensorrt_llm.llmapi.SchedulerConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.model_config", false]], "model_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.model_config", false]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.model_name", false]], "model_post_init() (tensorrt_llm.llmapi.torchllmargs method)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.trtllmargs method)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.model_post_init", false]], "modelconfig (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.ModelConfig", false]], "modelrunner (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.ModelRunner", false]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp", false]], "module": [[82, "module-tensorrt_llm", false], [82, "module-tensorrt_llm.functional", false], [83, "module-tensorrt_llm", false], [83, "module-tensorrt_llm.layers.activation", false], [83, "module-tensorrt_llm.layers.attention", false], [83, "module-tensorrt_llm.layers.cast", false], [83, "module-tensorrt_llm.layers.conv", false], [83, "module-tensorrt_llm.layers.embedding", false], [83, "module-tensorrt_llm.layers.linear", false], [83, "module-tensorrt_llm.layers.mlp", false], [83, "module-tensorrt_llm.layers.normalization", false], [83, "module-tensorrt_llm.layers.pooling", false], [84, "module-tensorrt_llm", false], [84, "module-tensorrt_llm.models", false], [85, "module-tensorrt_llm", false], [85, "module-tensorrt_llm.plugin", false], [86, "module-tensorrt_llm", false], [86, "module-tensorrt_llm.quantization", false], [87, "module-tensorrt_llm", false], [87, "module-tensorrt_llm.runtime", false]], "modulo() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.modulo", false]], "moe (tensorrt_llm.functional.sidestreamidtype attribute)": [[82, "tensorrt_llm.functional.SideStreamIDType.moe", false]], "moe_allreduce_residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.MOE_ALLREDUCE_RESIDUAL_RMS_NORM", false]], "moe_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.moe_backend", false]], "moe_load_balancer (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.moe_load_balancer", false]], "moe_max_num_tokens (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.moe_max_num_tokens", false]], "monitor_memory (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.monitor_memory", false]], "mpicommsession (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.MpiCommSession", false]], "mptforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.MPTForCausalLM", false]], "mptmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.MPTModel", false]], "mrope (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.mrope", false]], "mrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.mrope", false]], "mropeparams (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.MropeParams", false]], "msg (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "id10", false], [70, "id13", false], [70, "id16", false], [70, "tensorrt_llm.llmapi.TorchLlmArgs.msg", false]], "msg (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "id19", false], [70, "id22", false], [70, "id25", false], [70, "id28", false], [70, "id31", false], [70, "tensorrt_llm.llmapi.TrtLlmArgs.msg", false]], "mtpdecodingconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig", false]], "mul() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.mul", false]], "multi_block_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[70, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode", false]], "multimodalmodelrunner (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner", false]], "multiply_and_lora() (tensorrt_llm.layers.linear.linearbase method)": [[83, "tensorrt_llm.layers.linear.LinearBase.multiply_and_lora", false]], "multiply_collect() (tensorrt_llm.layers.linear.linearbase method)": [[83, "tensorrt_llm.layers.linear.LinearBase.multiply_collect", false]], "multiply_collect() (tensorrt_llm.layers.linear.rowlinear method)": [[83, "tensorrt_llm.layers.linear.RowLinear.multiply_collect", false]], "n (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.n", false]], "name (tensorrt_llm.functional.tensor property)": [[82, "tensorrt_llm.functional.Tensor.name", false]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[87, "tensorrt_llm.runtime.TensorInfo.name", false]], "native_quant_flow (tensorrt_llm.models.gemmaforcausallm attribute)": [[84, "tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW", false]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.NCCL", false]], "ndim() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.ndim", false]], "network (tensorrt_llm.functional.tensor property)": [[82, "tensorrt_llm.functional.Tensor.network", false]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids", false]], "ngram (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.NGRAM", false]], "ngramdecodingconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig", false]], "no_quant (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.NO_QUANT", false]], "no_repeat_ngram_size (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size", false]], "no_repeat_ngram_size (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size", false]], "non_gated_version() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.non_gated_version", false]], "none (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.NONE", false]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.none", false]], "none (tensorrt_llm.models.speculativedecodingmode attribute)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode.NONE", false]], "nonzero() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.nonzero", false]], "not_op() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.not_op", false]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.num_beams", false]], "num_draft_tokens (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.num_draft_tokens", false]], "num_eagle_layers (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers", false]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads", false]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.num_kv_heads", false]], "num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer", false]], "num_kv_heads_per_layer (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer", false]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers", false]], "num_medusa_heads (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads", false]], "num_nextn_predict_layers (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers", false]], "num_return_sequences (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.num_return_sequences", false]], "numel() (tensorrt_llm.runtime.tensorinfo method)": [[87, "tensorrt_llm.runtime.TensorInfo.numel", false]], "nvfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.NVFP4", false]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1", false]], "onboard_blocks (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks", false]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT", false]], "op_and() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.op_and", false]], "op_or() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.op_or", false]], "op_xor() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.op_xor", false]], "opaque_state (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.opaque_state", false]], "opt_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.opt_batch_size", false]], "opt_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.opt_num_tokens", false]], "optforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.OPTForCausalLM", false]], "optmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.OPTModel", false]], "outer() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.outer", false]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs", false]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.output_log_probs", false]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths", false]], "output_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.output_timing_cache", false]], "outputs (tensorrt_llm.llmapi.requestoutput attribute)": [[70, "tensorrt_llm.llmapi.RequestOutput.outputs", false]], "pad() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.pad", false]], "pad_id (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.pad_id", false]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.pad_id", false]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.padding", false]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache", false]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.paged_state", false]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.paged_state", false]], "permute() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.permute", false]], "permute() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.permute", false]], "phi3forcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.Phi3ForCausalLM", false]], "phi3model (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.Phi3Model", false]], "phiforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.PhiForCausalLM", false]], "phimodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.PhiModel", false]], "pixartalphatextprojection (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection", false]], "plugin_config (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.plugin_config", false]], "pluginconfig (class in tensorrt_llm.plugin)": [[85, "tensorrt_llm.plugin.PluginConfig", false]], "positionembeddingtype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.PositionEmbeddingType", false]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[82, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm", false]], "posterior_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold", false]], "postprocess() (tensorrt_llm.layers.attention.attention method)": [[83, "tensorrt_llm.layers.attention.Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[83, "tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.embedding.embedding method)": [[83, "tensorrt_llm.layers.embedding.Embedding.postprocess", false]], "postprocess() (tensorrt_llm.layers.linear.linear method)": [[83, "tensorrt_llm.layers.linear.Linear.postprocess", false]], "pow() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.pow", false]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids", false]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens", false]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[82, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm", false]], "pre_quant_scale (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.pre_quant_scale", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.decodermodel method)": [[84, "tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.encodermodel method)": [[84, "tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.whisperencoder method)": [[84, "tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias", false]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[84, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[84, "tensorrt_llm.models.DecoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.dit method)": [[84, "tensorrt_llm.models.DiT.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.eagleforcausallm method)": [[84, "tensorrt_llm.models.EagleForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[84, "tensorrt_llm.models.EncoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.llavanextvisionwrapper method)": [[84, "tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mambaforcausallm method)": [[84, "tensorrt_llm.models.MambaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mllamaforcausallm method)": [[84, "tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[84, "tensorrt_llm.models.PretrainedModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[84, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.redrafterforcausallm method)": [[84, "tensorrt_llm.models.ReDrafterForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[84, "tensorrt_llm.models.WhisperEncoder.prepare_inputs", false]], "prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm", false]], "prepare_recurrent_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[84, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs", false]], "preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.preprocess", false]], "presence_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.presence_penalty", false]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.presence_penalty", false]], "pretrainedconfig (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.PretrainedConfig", false]], "pretrainedmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.PretrainedModel", false]], "print_iter_log (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log", false]], "priority (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority", false]], "process_input() (tensorrt_llm.runtime.encdecmodelrunner method)": [[87, "tensorrt_llm.runtime.EncDecModelRunner.process_input", false]], "process_logits_including_draft() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.process_logits_including_draft", false]], "prod() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.prod", false]], "profiler (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.profiler", false]], "profiling_verbosity (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.profiling_verbosity", false]], "prompt (tensorrt_llm.llmapi.requestoutput attribute)": [[70, "tensorrt_llm.llmapi.RequestOutput.prompt", false]], "prompt (tensorrt_llm.llmapi.requestoutput property)": [[70, "id6", false]], "prompt_logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs", false]], "prompt_logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.prompt_logprobs", false]], "prompt_lookup_num_tokens (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.NGramDecodingConfig.prompt_lookup_num_tokens", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput attribute)": [[70, "tensorrt_llm.llmapi.RequestOutput.prompt_token_ids", false]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.PromptTuningEmbedding", false]], "ptuning_setup() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup", false]], "ptuning_setup_fuyu() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu", false]], "ptuning_setup_llava_next() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next", false]], "ptuning_setup_phi3() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3", false]], "ptuning_setup_pixtral() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral", false]], "python_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.python_e2e", false]], "pytorch_eagle_weights_path (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.pytorch_eagle_weights_path", false]], "quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.quant_algo", false]], "quant_algo (tensorrt_llm.models.pretrainedconfig property)": [[84, "tensorrt_llm.models.PretrainedConfig.quant_algo", false]], "quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[70, "tensorrt_llm.llmapi.QuantConfig.quant_mode", false]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[84, "tensorrt_llm.models.PretrainedConfig.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.quant_mode", false]], "quantalgo (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.QuantAlgo", false]], "quantalgo (class in tensorrt_llm.quantization)": [[86, "tensorrt_llm.quantization.QuantAlgo", false]], "quantconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.QuantConfig", false]], "quantize() (tensorrt_llm.models.baichuanforcausallm class method)": [[84, "tensorrt_llm.models.BaichuanForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.chatglmforcausallm class method)": [[84, "tensorrt_llm.models.ChatGLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.cogvlmforcausallm class method)": [[84, "tensorrt_llm.models.CogVLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gemmaforcausallm class method)": [[84, "tensorrt_llm.models.GemmaForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gptforcausallm class method)": [[84, "tensorrt_llm.models.GPTForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[84, "tensorrt_llm.models.LLaMAForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[84, "tensorrt_llm.models.PretrainedModel.quantize", false]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[86, "tensorrt_llm.quantization.quantize_and_export", false]], "quantmode (class in tensorrt_llm.quantization)": [[86, "tensorrt_llm.quantization.QuantMode", false]], "quick_gelu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.quick_gelu", false]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession", false]], "rand() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.rand", false]], "random_seed (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.random_seed", false]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.random_seed", false]], "rank() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.rank", false]], "rearrange() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.rearrange", false]], "recurrentgemmaforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.RecurrentGemmaForCausalLM", false]], "recv() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.recv", false]], "redrafter_draft_len_per_beam (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam", false]], "redrafter_num_beams (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.redrafter_num_beams", false]], "redrafterforcausallm (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.ReDrafterForCausalLM", false]], "reduce() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.reduce", false]], "reduce_scatter() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.reduce_scatter", false]], "regex (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams.regex", false]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.relative", false]], "relaxed_delta (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta", false]], "relaxed_topk (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk", false]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[84, "tensorrt_llm.models.PretrainedModel.release", false]], "relu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.relu", false]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding", false]], "reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search", false]], "repeat() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.repeat", false]], "repeat() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.repeat", false]], "repeat_interleave() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.repeat_interleave", false]], "repetition_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.repetition_penalty", false]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty", false]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.replace_all_uses_with", false]], "request_id (tensorrt_llm.llmapi.requestoutput attribute)": [[70, "tensorrt_llm.llmapi.RequestOutput.request_id", false]], "request_type (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[70, "tensorrt_llm.llmapi.DisaggregatedParams.request_type", false]], "requesterror (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.RequestError", false]], "requestoutput (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.RequestOutput", false]], "residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM", false]], "residual_rms_norm_out_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8", false]], "residual_rms_norm_out_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4", false]], "residual_rms_norm_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8", false]], "residual_rms_norm_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4", false]], "residual_rms_prepost_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[82, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM", false]], "return_context_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.return_context_logits", false]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.return_dict", false]], "return_encoder_output (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.return_encoder_output", false]], "return_generation_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.return_generation_logits", false]], "return_perf_metrics (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.return_perf_metrics", false]], "rg_lru() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.rg_lru", false]], "rms_norm() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.rms_norm", false]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.RmsNorm", false]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[82, "tensorrt_llm.functional.LayerNormType.RmsNorm", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size", false]], "rnn_head_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.rnn_head_size", false]], "rnn_head_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.rnn_head_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.rnn_hidden_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.rnn_hidden_size", false]], "robertaforquestionanswering (in module tensorrt_llm.models)": [[84, "tensorrt_llm.models.RobertaForQuestionAnswering", false]], "robertaforsequenceclassification (in module tensorrt_llm.models)": [[84, "tensorrt_llm.models.RobertaForSequenceClassification", false]], "robertamodel (in module tensorrt_llm.models)": [[84, "tensorrt_llm.models.RobertaModel", false]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox", false]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj", false]], "ropeembeddingutils (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils", false]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.RotaryScalingType", false]], "rotate_every_two() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two", false]], "rotate_half() (tensorrt_llm.functional.ropeembeddingutils static method)": [[82, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half", false]], "round() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.round", false]], "rowlinear (class in tensorrt_llm.layers.linear)": [[83, "tensorrt_llm.layers.linear.RowLinear", false]], "run() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.run", false]], "run() (tensorrt_llm.runtime.session method)": [[87, "tensorrt_llm.runtime.Session.run", false]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[87, "tensorrt_llm.runtime.GenerationSession.runtime", false]], "runtime (tensorrt_llm.runtime.session property)": [[87, "tensorrt_llm.runtime.Session.runtime", false]], "samplingconfig (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.SamplingConfig", false]], "samplingparams (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.SamplingParams", false]], "save() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.save", false]], "save_checkpoint() (tensorrt_llm.models.llavanextvisionwrapper method)": [[84, "tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint", false]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[84, "tensorrt_llm.models.PretrainedModel.save_checkpoint", false]], "scatter() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.scatter", false]], "scatter_nd() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.scatter_nd", false]], "schedulerconfig (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.SchedulerConfig", false]], "sd35adalayernormzerox (class in tensorrt_llm.layers.normalization)": [[83, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX", false]], "sd3patchembed (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.SD3PatchEmbed", false]], "sd3transformer2dmodel (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.SD3Transformer2DModel", false]], "secondary_offload_min_priority (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority", false]], "seed (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.seed", false]], "select() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.select", false]], "select() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.select", false]], "selective_scan() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.selective_scan", false]], "send() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.send", false]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[87, "tensorrt_llm.runtime.ModelRunner.serialize_engine", false]], "session (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.Session", false]], "set_attn_processor() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor", false]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL", false]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist", false]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.set_rank", false]], "set_rel_attn_table() (tensorrt_llm.layers.attention.attention method)": [[83, "tensorrt_llm.layers.attention.Attention.set_rel_attn_table", false]], "set_shapes() (tensorrt_llm.runtime.session method)": [[87, "tensorrt_llm.runtime.Session.set_shapes", false]], "setup() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.setup", false]], "setup_fake_prompts() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts", false]], "setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl", false]], "setup_fake_prompts_vila() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila", false]], "setup_inputs() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs", false]], "shape (tensorrt_llm.functional.tensor property)": [[82, "tensorrt_llm.functional.Tensor.shape", false]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[87, "tensorrt_llm.runtime.TensorInfo.shape", false]], "shape() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.shape", false]], "shutdown() (tensorrt_llm.llmapi.llm method)": [[70, "tensorrt_llm.llmapi.LLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.mpicommsession method)": [[70, "tensorrt_llm.llmapi.MpiCommSession.shutdown", false]], "sidestreamidtype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.SideStreamIDType", false]], "sigmoid() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.sigmoid", false]], "silu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.silu", false]], "sin() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.sin", false]], "sink_token_length (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[70, "tensorrt_llm.llmapi.KvCacheConfig.sink_token_length", false]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.sink_token_length", false]], "size (tensorrt_llm.functional.sliceinputtype attribute)": [[82, "tensorrt_llm.functional.SliceInputType.size", false]], "size() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.size", false]], "skip_cross_attn_blocks (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks", false]], "skip_cross_kv (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.skip_cross_kv", false]], "skip_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.skip_special_tokens", false]], "slice() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.slice", false]], "sliceinputtype (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.SliceInputType", false]], "sliding_window_causal (tensorrt_llm.functional.attentionmasktype attribute)": [[82, "tensorrt_llm.functional.AttentionMaskType.sliding_window_causal", false]], "smoothquant_val (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.smoothquant_val", false]], "softmax() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.softmax", false]], "softplus() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.softplus", false]], "spaces_between_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens", false]], "specdecodingparams (class in tensorrt_llm.layers.attention)": [[83, "tensorrt_llm.layers.attention.SpecDecodingParams", false]], "speculative_decoding_mode (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode", false]], "speculativedecodingmode (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.SpeculativeDecodingMode", false]], "split() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.split", false]], "split() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.split", false]], "split_prompt_by_images() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images", false]], "sqrt() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.sqrt", false]], "sqrt() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.sqrt", false]], "squared_relu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.squared_relu", false]], "squeeze() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.squeeze", false]], "squeeze() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.squeeze", false]], "squeeze() (tensorrt_llm.runtime.tensorinfo method)": [[87, "tensorrt_llm.runtime.TensorInfo.squeeze", false]], "stack() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.stack", false]], "start (tensorrt_llm.functional.sliceinputtype attribute)": [[82, "tensorrt_llm.functional.SliceInputType.start", false]], "state_dtype (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.state_dtype", false]], "state_dtype (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.state_dtype", false]], "state_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.state_size", false]], "state_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.state_size", false]], "static (tensorrt_llm.llmapi.batchingtype attribute)": [[70, "tensorrt_llm.llmapi.BatchingType.STATIC", false]], "static_batch (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[70, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH", false]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[87, "tensorrt_llm.runtime.KVCacheManager.step", false]], "stop (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.stop", false]], "stop_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.stop_reason", false]], "stop_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.stop_token_ids", false]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.stop_words_list", false]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.StoppingCriteria", false]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.StoppingCriteriaList", false]], "stride (tensorrt_llm.functional.sliceinputtype attribute)": [[82, "tensorrt_llm.functional.SliceInputType.stride", false]], "strongly_typed (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.strongly_typed", false]], "structural_tag (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[70, "tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag", false]], "sub() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.sub", false]], "submit() (tensorrt_llm.llmapi.mpicommsession method)": [[70, "tensorrt_llm.llmapi.MpiCommSession.submit", false]], "submit_sync() (tensorrt_llm.llmapi.mpicommsession method)": [[70, "tensorrt_llm.llmapi.MpiCommSession.submit_sync", false]], "sum() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.sum", false]], "swiglu() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.swiglu", false]], "tanh() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.tanh", false]], "temperature (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.temperature", false]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.temperature", false]], "tensor (class in tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.Tensor", false]], "tensorinfo (class in tensorrt_llm.runtime)": [[87, "tensorrt_llm.runtime.TensorInfo", false]], "tensorrt_llm": [[82, "module-tensorrt_llm", false], [83, "module-tensorrt_llm", false], [84, "module-tensorrt_llm", false], [85, "module-tensorrt_llm", false], [86, "module-tensorrt_llm", false], [87, "module-tensorrt_llm", false]], "tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm", false], [1, "_CPPv412tensorrt_llm", false]], "tensorrt_llm.functional": [[82, "module-tensorrt_llm.functional", false]], "tensorrt_llm.layers.activation": [[83, "module-tensorrt_llm.layers.activation", false]], "tensorrt_llm.layers.attention": [[83, "module-tensorrt_llm.layers.attention", false]], "tensorrt_llm.layers.cast": [[83, "module-tensorrt_llm.layers.cast", false]], "tensorrt_llm.layers.conv": [[83, "module-tensorrt_llm.layers.conv", false]], "tensorrt_llm.layers.embedding": [[83, "module-tensorrt_llm.layers.embedding", false]], "tensorrt_llm.layers.linear": [[83, "module-tensorrt_llm.layers.linear", false]], "tensorrt_llm.layers.mlp": [[83, "module-tensorrt_llm.layers.mlp", false]], "tensorrt_llm.layers.normalization": [[83, "module-tensorrt_llm.layers.normalization", false]], "tensorrt_llm.layers.pooling": [[83, "module-tensorrt_llm.layers.pooling", false]], "tensorrt_llm.models": [[84, "module-tensorrt_llm.models", false]], "tensorrt_llm.plugin": [[85, "module-tensorrt_llm.plugin", false]], "tensorrt_llm.quantization": [[86, "module-tensorrt_llm.quantization", false]], "tensorrt_llm.runtime": [[87, "module-tensorrt_llm.runtime", false]], "tensorrt_llm::batch_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_managerE", false], [1, "_CPPv4N12tensorrt_llm13batch_managerE", false]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", false]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE", false]], "tensorrt_llm::executor::additionalmodeloutput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", false]], "tensorrt_llm::executor::additionalmodeloutput::additionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", false]], "tensorrt_llm::executor::additionalmodeloutput::gathercontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", false]], "tensorrt_llm::executor::additionalmodeloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", false]], "tensorrt_llm::executor::additionalmodeloutput::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", false]], "tensorrt_llm::executor::additionaloutput (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", false]], "tensorrt_llm::executor::additionaloutput::additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", false]], "tensorrt_llm::executor::additionaloutput::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::output (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", false]], "tensorrt_llm::executor::additionaloutput::~additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", false]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", false]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", false]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", false]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", false]], "tensorrt_llm::executor::bufferview (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BufferViewE", false]], "tensorrt_llm::executor::cachetransceiverconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", false]], "tensorrt_llm::executor::cachetransceiverconfig::cachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI6size_tEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::cachetransceiverconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::cachetransceiverconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig15setMaxNumTokensE6size_t", false]], "tensorrt_llm::executor::capacityschedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kstatic_batch (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", false]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", false]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", false]], "tensorrt_llm::executor::communicationmode::korchestrator (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", false]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", false]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", false]], "tensorrt_llm::executor::contextchunkingpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kequal_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kfirst_come_first_served (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", false]], "tensorrt_llm::executor::contextphaseparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", false]], "tensorrt_llm::executor::contextphaseparams::contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::deleter (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", false]], "tensorrt_llm::executor::contextphaseparams::getdrafttokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getfirstgentokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getreqid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", false]], "tensorrt_llm::executor::contextphaseparams::getserializedstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", false]], "tensorrt_llm::executor::contextphaseparams::getstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false], [0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false]], "tensorrt_llm::executor::contextphaseparams::mdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mfirstgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mreqid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", false]], "tensorrt_llm::executor::contextphaseparams::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", false]], "tensorrt_llm::executor::contextphaseparams::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::popfirstgentokens (c++ function)": [[0, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::releasestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", false]], "tensorrt_llm::executor::contextphaseparams::requestidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", false]], "tensorrt_llm::executor::contextphaseparams::stateptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", false]], "tensorrt_llm::executor::contextphaseparams::~contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", false]], "tensorrt_llm::executor::datatransceiverstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", false]], "tensorrt_llm::executor::datatransceiverstate::datatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", false], [0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcachestate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::mcachestate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::mcommstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", false]], "tensorrt_llm::executor::datatransceiverstate::setcachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::setcommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", false]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE", false]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", false]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", false]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", false]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", false]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", false]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", false]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", false]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", false]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", false]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", false]], "tensorrt_llm::executor::debugconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", false]], "tensorrt_llm::executor::debugconfig::debugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", false]], "tensorrt_llm::executor::debugconfig::getdebuginputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugoutputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensornames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", false]], "tensorrt_llm::executor::debugconfig::mdebuginputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugoutputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensornames (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensorsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", false]], "tensorrt_llm::executor::debugconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", false]], "tensorrt_llm::executor::debugconfig::setdebuginputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugoutputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugtensornames (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", false]], "tensorrt_llm::executor::debugconfig::setdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::debugconfig::stringvec (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", false]], "tensorrt_llm::executor::debugtensorsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", false]], "tensorrt_llm::executor::debugtensorsperiteration::debugtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", false]], "tensorrt_llm::executor::debugtensorsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", false]], "tensorrt_llm::executor::decodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::decodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::decodingconfig::enableseamlesslookaheaddecoding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::executor::decodingconfig::getdecodingmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", false]], "tensorrt_llm::executor::decodingconfig::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingmaxnumrequest (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", false]], "tensorrt_llm::executor::decodingconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", false]], "tensorrt_llm::executor::decodingconfig::mdecodingmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", false]], "tensorrt_llm::executor::decodingconfig::meagleconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingmaxnumrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", false]], "tensorrt_llm::executor::decodingconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", false]], "tensorrt_llm::executor::decodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setdecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", false]], "tensorrt_llm::executor::decodingconfig::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", false]], "tensorrt_llm::executor::decodingconfig::setlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", false]], "tensorrt_llm::executor::decodingmode (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", false]], "tensorrt_llm::executor::decodingmode::allbitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::anybitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::auto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", false]], "tensorrt_llm::executor::decodingmode::beamsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::decodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::eagle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", false]], "tensorrt_llm::executor::decodingmode::explicitdrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::externaldrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::getname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", false]], "tensorrt_llm::executor::decodingmode::getstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", false]], "tensorrt_llm::executor::decodingmode::isauto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", false]], "tensorrt_llm::executor::decodingmode::isbeamsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::iseagle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", false]], "tensorrt_llm::executor::decodingmode::isexplicitdrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::isexternaldrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::islookahead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", false]], "tensorrt_llm::executor::decodingmode::ismedusa (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", false]], "tensorrt_llm::executor::decodingmode::istopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", false]], "tensorrt_llm::executor::decodingmode::istopkandtopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopkortopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", false]], "tensorrt_llm::executor::decodingmode::isusebantokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", false]], "tensorrt_llm::executor::decodingmode::isusebanwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", false]], "tensorrt_llm::executor::decodingmode::isuseexpliciteosstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", false]], "tensorrt_llm::executor::decodingmode::isusefrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusemaxlengthstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", false]], "tensorrt_llm::executor::decodingmode::isuseminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", false]], "tensorrt_llm::executor::decodingmode::isuseminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", false]], "tensorrt_llm::executor::decodingmode::isusenorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::decodingmode::isuseoccurrencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isuserepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusestopcriteria (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", false]], "tensorrt_llm::executor::decodingmode::isusestopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", false]], "tensorrt_llm::executor::decodingmode::isusetemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", false]], "tensorrt_llm::executor::decodingmode::isusevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", false]], "tensorrt_llm::executor::decodingmode::kauto (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", false]], "tensorrt_llm::executor::decodingmode::kbeamsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", false]], "tensorrt_llm::executor::decodingmode::keagle (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", false]], "tensorrt_llm::executor::decodingmode::kexplicitdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::kexternaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::klookahead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", false]], "tensorrt_llm::executor::decodingmode::kmedusa (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", false]], "tensorrt_llm::executor::decodingmode::knumflags (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", false]], "tensorrt_llm::executor::decodingmode::ktopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", false]], "tensorrt_llm::executor::decodingmode::ktopktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", false]], "tensorrt_llm::executor::decodingmode::ktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", false]], "tensorrt_llm::executor::decodingmode::kusebantokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", false]], "tensorrt_llm::executor::decodingmode::kusebanwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", false]], "tensorrt_llm::executor::decodingmode::kuseexpliciteosstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", false]], "tensorrt_llm::executor::decodingmode::kusefrequencypenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusemaxlengthstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", false]], "tensorrt_llm::executor::decodingmode::kuseminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", false]], "tensorrt_llm::executor::decodingmode::kuseminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", false]], "tensorrt_llm::executor::decodingmode::kusenorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::decodingmode::kuseoccurrencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepresencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kuserepetitionpenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusestandardstopcriteria (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", false]], "tensorrt_llm::executor::decodingmode::kusestopwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", false]], "tensorrt_llm::executor::decodingmode::kusetemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", false]], "tensorrt_llm::executor::decodingmode::kusevariablebeamwidthsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", false]], "tensorrt_llm::executor::decodingmode::lookahead (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", false]], "tensorrt_llm::executor::decodingmode::medusa (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", false]], "tensorrt_llm::executor::decodingmode::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", false]], "tensorrt_llm::executor::decodingmode::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", false]], "tensorrt_llm::executor::decodingmode::setbitto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", false]], "tensorrt_llm::executor::decodingmode::topk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", false]], "tensorrt_llm::executor::decodingmode::topktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", false]], "tensorrt_llm::executor::decodingmode::topp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", false]], "tensorrt_llm::executor::decodingmode::underlyingtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::executor::decodingmode::usebantokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", false]], "tensorrt_llm::executor::decodingmode::usebanwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", false]], "tensorrt_llm::executor::decodingmode::useexpliciteosstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", false]], "tensorrt_llm::executor::decodingmode::usefrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usemaxlengthstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", false]], "tensorrt_llm::executor::decodingmode::useminlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", false]], "tensorrt_llm::executor::decodingmode::useminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", false]], "tensorrt_llm::executor::decodingmode::usenorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", false]], "tensorrt_llm::executor::decodingmode::useoccurrencepenalties (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", false]], "tensorrt_llm::executor::decodingmode::usepresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", false]], "tensorrt_llm::executor::decodingmode::userepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usestopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", false]], "tensorrt_llm::executor::decodingmode::usetemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", false]], "tensorrt_llm::executor::decodingmode::usevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", false]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE", false]], "tensorrt_llm::executor::detail::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", false]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::disagg_executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitcontextresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitgenerationresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::canenqueue (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuecontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuegeneration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getcontextexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getgenexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::~disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", false]], "tensorrt_llm::executor::disagg_executor::responsewithid (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::gid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::response (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::~responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", false]], "tensorrt_llm::executor::disservingrequeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachetransferms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", false]], "tensorrt_llm::executor::dynamicbatchconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", false]], "tensorrt_llm::executor::dynamicbatchconfig::dynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", false]], "tensorrt_llm::executor::dynamicbatchconfig::getbatchsizetable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getdynamicbatchmovingaveragewindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablebatchsizetuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablemaxnumtokenstuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablebatchsizetuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablemaxnumtokenstuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", false]], "tensorrt_llm::executor::eaglechoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", false]], "tensorrt_llm::executor::eagleconfig::checkposteriorvalue (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", false]], "tensorrt_llm::executor::eagleconfig::eagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::eagleconfig::getdynamictreemaxtopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", false]], "tensorrt_llm::executor::eagleconfig::geteaglechoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", false]], "tensorrt_llm::executor::eagleconfig::getposteriorthreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", false]], "tensorrt_llm::executor::eagleconfig::isgreedysampling (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", false]], "tensorrt_llm::executor::eagleconfig::mdynamictreemaxtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", false]], "tensorrt_llm::executor::eagleconfig::meaglechoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig::mgreedysampling (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", false]], "tensorrt_llm::executor::eagleconfig::mposteriorthreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", false]], "tensorrt_llm::executor::eagleconfig::musedynamictree (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", false]], "tensorrt_llm::executor::eagleconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", false]], "tensorrt_llm::executor::eagleconfig::usedynamictree (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", false]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE", false]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", false]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", false]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", false]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", false]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", false]], "tensorrt_llm::executor::executor::getkvcacheeventmanager (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", false]], "tensorrt_llm::executor::executor::getlatestdebugtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", false]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", false]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", false]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", false]], "tensorrt_llm::executor::executor::isparticipant (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", false]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", false]], "tensorrt_llm::executor::executor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", false]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", false]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", false]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", false]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", false]], "tensorrt_llm::executor::executorconfig::getadditionalmodeloutputs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", false]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", false]], "tensorrt_llm::executor::executorconfig::getcachetransceiverconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdebugconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", false]], "tensorrt_llm::executor::executorconfig::getenabletrtoverlap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", false]], "tensorrt_llm::executor::executorconfig::getextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", false]], "tensorrt_llm::executor::executorconfig::getgathergenerationlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", false]], "tensorrt_llm::executor::executorconfig::getgpuweightspercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", false]], "tensorrt_llm::executor::executorconfig::getguideddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbatchsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::executor::executorconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::executorconfig::getmaxqueuesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", false]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", false]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", false]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getprompttableoffloading (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", false]], "tensorrt_llm::executor::executorconfig::getrecvpollperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", false]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getspecdecconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", false]], "tensorrt_llm::executor::executorconfig::getusegpudirectstorage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", false]], "tensorrt_llm::executor::executorconfig::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::madditionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", false]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", false]], "tensorrt_llm::executor::executorconfig::mcachetransceiverconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", false]], "tensorrt_llm::executor::executorconfig::mdebugconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", false]], "tensorrt_llm::executor::executorconfig::mdecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", false]], "tensorrt_llm::executor::executorconfig::menabletrtoverlap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", false]], "tensorrt_llm::executor::executorconfig::mextendedruntimeperfknobconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::executorconfig::mgathergenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", false]], "tensorrt_llm::executor::executorconfig::mgpuweightspercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", false]], "tensorrt_llm::executor::executorconfig::mguideddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::executorconfig::mmaxbatchsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", false]], "tensorrt_llm::executor::executorconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::executorconfig::mmaxqueuesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", false]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", false]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mprompttableoffloading (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", false]], "tensorrt_llm::executor::executorconfig::mrecvpollperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", false]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", false]], "tensorrt_llm::executor::executorconfig::mspeculativedecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::musegpudirectstorage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", false]], "tensorrt_llm::executor::executorconfig::setadditionalmodeloutputs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", false]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", false]], "tensorrt_llm::executor::executorconfig::setcachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::executorconfig::setdebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", false]], "tensorrt_llm::executor::executorconfig::setdecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", false]], "tensorrt_llm::executor::executorconfig::setenabletrtoverlap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", false]], "tensorrt_llm::executor::executorconfig::setextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::executorconfig::setgathergenerationlogits (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", false]], "tensorrt_llm::executor::executorconfig::setgpuweightspercent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", false]], "tensorrt_llm::executor::executorconfig::setguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", false]], "tensorrt_llm::executor::executorconfig::setmaxbatchsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxqueuesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::executorconfig::setmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", false]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", false]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", false]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setprompttableoffloading (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", false]], "tensorrt_llm::executor::executorconfig::setrecvpollperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", false]], "tensorrt_llm::executor::executorconfig::setspecdecconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setusegpudirectstorage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::extendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getmultiblockmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::menablecontextfmhafp32acc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mmultiblockmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setmultiblockmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", false]], "tensorrt_llm::executor::externaldrafttokensconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::externaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getfastlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mfastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", false]], "tensorrt_llm::executor::finishreason (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", false]], "tensorrt_llm::executor::finishreason::kcancelled (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", false]], "tensorrt_llm::executor::finishreason::kend_id (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", false]], "tensorrt_llm::executor::finishreason::klength (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", false]], "tensorrt_llm::executor::finishreason::knot_finished (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", false]], "tensorrt_llm::executor::finishreason::kstop_words (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", false]], "tensorrt_llm::executor::finishreason::ktimed_out (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", false]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", false]], "tensorrt_llm::executor::guideddecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", false]], "tensorrt_llm::executor::guideddecodingconfig::getbackend (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getencodedvocab (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getstoptokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", false]], "tensorrt_llm::executor::guideddecodingconfig::gettokenizerstr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kxgrammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::mbackend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::mencodedvocab (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", false]], "tensorrt_llm::executor::guideddecodingconfig::mstoptokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", false]], "tensorrt_llm::executor::guideddecodingconfig::mtokenizerstr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", false]], "tensorrt_llm::executor::guideddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::guideddecodingconfig::setbackend (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", false]], "tensorrt_llm::executor::guideddecodingconfig::setencodedvocab (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::setstoptokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", false]], "tensorrt_llm::executor::guideddecodingconfig::settokenizerstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", false]], "tensorrt_llm::executor::guideddecodingconfig::validate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", false]], "tensorrt_llm::executor::guideddecodingparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", false]], "tensorrt_llm::executor::guideddecodingparams::getguide (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", false]], "tensorrt_llm::executor::guideddecodingparams::getguidetype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", false]], "tensorrt_llm::executor::guideddecodingparams::guideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kebnf_grammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson_schema (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kregex (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kstructural_tag (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", false]], "tensorrt_llm::executor::guideddecodingparams::mguide (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", false]], "tensorrt_llm::executor::guideddecodingparams::mguidetype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE", false]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", false]], "tensorrt_llm::executor::inflightbatchingstats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", false]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", false]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::crosskvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", false]], "tensorrt_llm::executor::iterationstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizeruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizestatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizetunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensstatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokenstunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::newactiverequestsqueuelatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numcompletedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", false]], "tensorrt_llm::executor::iterationstats::numnewactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numqueuedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", false]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", false]], "tensorrt_llm::executor::iterationstats::specdecstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12specDecStatsE", false]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", false]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", false]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", false]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", false]], "tensorrt_llm::executor::kv_cache (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", false]], "tensorrt_llm::executor::kv_cache::agentdesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::agentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::getbackendagentdesc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::agentdesc::mbackendagentdesc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", false]], "tensorrt_llm::executor::kv_cache::agentstate::agentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", false]], "tensorrt_llm::executor::kv_cache::agentstate::magentname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", false]], "tensorrt_llm::executor::kv_cache::agentstate::mconnectioninfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", false]], "tensorrt_llm::executor::kv_cache::agentstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", false]], "tensorrt_llm::executor::kv_cache::agentstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::mname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::useprogthread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::checkremotedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::connectremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::deregistermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getconnectioninfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getConnectionInfoEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalagentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getnotifiedsyncmessages (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::invalidateremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::loadremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::notifysyncmessage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::registermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::submittransferrequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::~basetransferagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::cachestate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::attentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mattentiontype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mkvfactor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kdefault (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kmla (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", false]], "tensorrt_llm::executor::kv_cache::cachestate::cachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", false]], "tensorrt_llm::executor::kv_cache::cachestate::getattentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getmodelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::mattentionconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mdatatype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mmodelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mnbkvheadsperlayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::msizeperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mtokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdpsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::menableattentiondp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mpipelineparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mtensorparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::commstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::commstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getmpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getselfidx (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getsocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::isagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::ismpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::issocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::mselfidx (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", false]], "tensorrt_llm::executor::kv_cache::commstate::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", false]], "tensorrt_llm::executor::kv_cache::commstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::connection (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", false]], "tensorrt_llm::executor::kv_cache::connection::isthreadsafe (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", false]], "tensorrt_llm::executor::kv_cache::connection::recv (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::send (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::~connection (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", false]], "tensorrt_llm::executor::kv_cache::connectioninfotype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getconnections (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::recvconnect (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::~connectionmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", false]], "tensorrt_llm::executor::kv_cache::datacontext (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", false]], "tensorrt_llm::executor::kv_cache::datacontext::datacontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", false]], "tensorrt_llm::executor::kv_cache::datacontext::gettag (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", false]], "tensorrt_llm::executor::kv_cache::datacontext::mtag (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dlsym (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getfunctionpointer (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::gethandle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getinstance (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mdllmutex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mhandlers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::~dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", false]], "tensorrt_llm::executor::kv_cache::maketransferagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::memorydesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::deserialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getaddr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getdeviceid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::maddr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mdeviceid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::memorydesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", false]], "tensorrt_llm::executor::kv_cache::memorydescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::gettype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::memorydescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kblk (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kdram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", false]], "tensorrt_llm::executor::kv_cache::memorytype::kfile (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kobj (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kvram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", false]], "tensorrt_llm::executor::kv_cache::mpistate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", false]], "tensorrt_llm::executor::kv_cache::mpistate::mranks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", false]], "tensorrt_llm::executor::kv_cache::mpistate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", false]], "tensorrt_llm::executor::kv_cache::mpistate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::registerdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", false]], "tensorrt_llm::executor::kv_cache::socketstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mip (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mport (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", false]], "tensorrt_llm::executor::kv_cache::socketstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", false]], "tensorrt_llm::executor::kv_cache::socketstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::syncmessage (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", false]], "tensorrt_llm::executor::kv_cache::transferop (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", false]], "tensorrt_llm::executor::kv_cache::transferop::kread (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", false]], "tensorrt_llm::executor::kv_cache::transferop::kwrite (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", false]], "tensorrt_llm::executor::kv_cache::transferrequest (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getdstdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getremotename (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsrcdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsyncmessage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mdstdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mremotename (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msrcdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msyncmessage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::transferrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", false]], "tensorrt_llm::executor::kv_cache::transferstatus (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", false]], "tensorrt_llm::executor::kv_cache::transferstatus::iscompleted (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::wait (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::~transferstatus (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", false]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", false]], "tensorrt_llm::executor::kvcacheconfig::fillemptyfieldsfromruntimedefaults (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::executor::kvcacheconfig::getcopyonpartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcrosskvcachefraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenablepartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::geteventbuffermaxsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindowvec (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", false]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", false]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", false]], "tensorrt_llm::executor::kvcacheconfig::mcopyonpartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::mcrosskvcachefraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::menablepartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::meventbuffermaxsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindowvec (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", false]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", false]], "tensorrt_llm::executor::kvcacheconfig::msecondaryoffloadminpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", false]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", false]], "tensorrt_llm::executor::kvcacheconfig::setcopyonpartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setcrosskvcachefraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::setenableblockreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setenablepartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::seteventbuffermaxsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setfreegpumemoryfraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::sethostcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxattentionwindowvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setonboardblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", false]], "tensorrt_llm::executor::kvcacheconfig::setsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", false]], "tensorrt_llm::executor::kvcacheconfig::setsinktokenlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", false]], "tensorrt_llm::executor::kvcachecreateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", false]], "tensorrt_llm::executor::kvcachecreateddata::numblockspercachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", false]], "tensorrt_llm::executor::kvcacheevent (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", false]], "tensorrt_llm::executor::kvcacheevent::data (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", false]], "tensorrt_llm::executor::kvcacheevent::eventid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", false]], "tensorrt_llm::executor::kvcacheevent::kvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", false]], "tensorrt_llm::executor::kvcacheeventdata (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", false]], "tensorrt_llm::executor::kvcacheeventdiff (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", false]], "tensorrt_llm::executor::kvcacheeventdiff::newvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", false]], "tensorrt_llm::executor::kvcacheeventdiff::oldvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", false]], "tensorrt_llm::executor::kvcacheeventmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", false]], "tensorrt_llm::executor::kvcacheeventmanager::getlatestevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcacheeventmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcachemanager (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", false]], "tensorrt_llm::executor::kvcacheremoveddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", false]], "tensorrt_llm::executor::kvcacheremoveddata::blockhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", false]], "tensorrt_llm::executor::kvcacheretentionconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecodedurationms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecoderetentionpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdirectory (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getperblockretentionpriorityduration (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettokenrangeretentionconfigs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettransfermode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kdefaultretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kmaxretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kminretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", false], [0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecodedurationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecoderetentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdirectory (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtokenrangeretentionconfigs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtransfermode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", false]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", false]], "tensorrt_llm::executor::kvcachestats::allocnewblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", false]], "tensorrt_llm::executor::kvcachestats::alloctotalblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", false]], "tensorrt_llm::executor::kvcachestats::cachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", false]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::missedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::reusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", false]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", false]], "tensorrt_llm::executor::kvcachestoredblockdata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::kvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcachestoredblockdata::loraid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::tokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", false]], "tensorrt_llm::executor::kvcachestoreddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", false]], "tensorrt_llm::executor::kvcachestoreddata::blocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", false]], "tensorrt_llm::executor::kvcachestoreddata::parenthash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", false]], "tensorrt_llm::executor::kvcachetransfermode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", false]], "tensorrt_llm::executor::kvcachetransfermode::dram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", false]], "tensorrt_llm::executor::kvcachetransfermode::gds (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", false]], "tensorrt_llm::executor::kvcachetransfermode::posix_debug_fallback (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", false]], "tensorrt_llm::executor::kvcacheupdateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", false]], "tensorrt_llm::executor::kvcacheupdateddata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevelupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheupdateddata::kvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", false]], "tensorrt_llm::executor::kvcacheupdateddata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priorityupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", false]], "tensorrt_llm::executor::logitspostprocessorbatched (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessorbatched (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getreplicate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::logitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessorbatched (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mreplicate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessorbatched (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setreplicate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", false]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresource (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresourcetuple (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::get (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getverificationsetsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getwindowsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::isle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::islegal (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingngram (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingverificationset (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::lookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mverificationsetsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mwindowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", false]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", false]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", false]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", false]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", false]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", false]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", false]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", false]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", false]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", false]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinnedpool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", false]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", false]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", false]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", false]], "tensorrt_llm::executor::millisecondstype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", false]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", false]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", false]], "tensorrt_llm::executor::modeltype::kencoder_decoder (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", false]], "tensorrt_llm::executor::modeltype::kencoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", false]], "tensorrt_llm::executor::mropeconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", false]], "tensorrt_llm::executor::mropeconfig::getmropepositiondeltas (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", false]], "tensorrt_llm::executor::mropeconfig::getmroperotarycossin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", false]], "tensorrt_llm::executor::mropeconfig::mmropepositiondeltas (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", false]], "tensorrt_llm::executor::mropeconfig::mmroperotarycossin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", false]], "tensorrt_llm::executor::mropeconfig::mropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", false]], "tensorrt_llm::executor::operator<< (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", false], [0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", false]], "tensorrt_llm::executor::orchestratorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", false]], "tensorrt_llm::executor::orchestratorconfig::getisorchestrator (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", false]], "tensorrt_llm::executor::orchestratorconfig::getorchleadercomm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", false]], "tensorrt_llm::executor::orchestratorconfig::getspawnprocesses (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", false]], "tensorrt_llm::executor::orchestratorconfig::getworkerexecutablepath (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", false]], "tensorrt_llm::executor::orchestratorconfig::misorchestrator (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", false]], "tensorrt_llm::executor::orchestratorconfig::morchleadercomm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", false]], "tensorrt_llm::executor::orchestratorconfig::mspawnprocesses (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", false]], "tensorrt_llm::executor::orchestratorconfig::mworkerexecutablepath (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", false]], "tensorrt_llm::executor::orchestratorconfig::orchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", false]], "tensorrt_llm::executor::orchestratorconfig::setisorchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", false]], "tensorrt_llm::executor::orchestratorconfig::setorchleadercomm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", false]], "tensorrt_llm::executor::orchestratorconfig::setspawnprocesses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", false]], "tensorrt_llm::executor::orchestratorconfig::setworkerexecutablepath (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", false]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", false]], "tensorrt_llm::executor::outputconfig::additionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", false]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", false]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", false]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnencoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", false]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", false]], "tensorrt_llm::executor::outputconfig::returnperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", false]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", false]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", false]], "tensorrt_llm::executor::parallelconfig::getnumnodes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", false]], "tensorrt_llm::executor::parallelconfig::getorchestratorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", false]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", false]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", false]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", false]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", false]], "tensorrt_llm::executor::parallelconfig::mnumnodes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", false]], "tensorrt_llm::executor::parallelconfig::morchestratorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", false]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", false]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", false]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setnumnodes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", false]], "tensorrt_llm::executor::parallelconfig::setorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", false]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", false]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", false]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getloraprefetchdir (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultoptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", false]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mloraprefetchdir (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", false]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::prioritytype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", false]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", false]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", false]], "tensorrt_llm::executor::prompttuningconfig::getinputtokenextraids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", false]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", false]], "tensorrt_llm::executor::prompttuningconfig::minputtokenextraids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", false]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", false]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", false]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE", false]], "tensorrt_llm::executor::request::getadditionaloutputnames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", false]], "tensorrt_llm::executor::request::getallottedtimems (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", false]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", false]], "tensorrt_llm::executor::request::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", false]], "tensorrt_llm::executor::request::getcontextphaseparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", false]], "tensorrt_llm::executor::request::getcrossattentionmask (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", false]], "tensorrt_llm::executor::request::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", false]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", false]], "tensorrt_llm::executor::request::getencoderinputfeatures (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", false]], "tensorrt_llm::executor::request::getencoderinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getencoderoutputlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", false]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", false]], "tensorrt_llm::executor::request::getexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", false]], "tensorrt_llm::executor::request::getguideddecodingparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", false]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getkvcacheretentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::request::getlanguageadapteruid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", false]], "tensorrt_llm::executor::request::getlookaheadconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", false]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", false]], "tensorrt_llm::executor::request::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", false]], "tensorrt_llm::executor::request::getmropeconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", false]], "tensorrt_llm::executor::request::getmultimodalembedding (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", false]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", false]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", false]], "tensorrt_llm::executor::request::getpositionids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", false]], "tensorrt_llm::executor::request::getpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", false]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", false]], "tensorrt_llm::executor::request::getrequesttype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", false]], "tensorrt_llm::executor::request::getreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", false]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", false]], "tensorrt_llm::executor::request::getskipcrossattnblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", false]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", false]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", false]], "tensorrt_llm::executor::request::kbatchedpostprocessorname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", false]], "tensorrt_llm::executor::request::kdefaultpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", false]], "tensorrt_llm::executor::request::kdynamicpostprocessornameprefix (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", false]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", false]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", false]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", false]], "tensorrt_llm::executor::request::setallottedtimems (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", false]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setclientid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", false]], "tensorrt_llm::executor::request::setcontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", false]], "tensorrt_llm::executor::request::setcrossattentionmask (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", false]], "tensorrt_llm::executor::request::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputfeatures (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputtokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", false]], "tensorrt_llm::executor::request::setencoderoutputlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", false]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", false]], "tensorrt_llm::executor::request::setexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", false]], "tensorrt_llm::executor::request::setguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::request::setkvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::request::setlanguageadapteruid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", false]], "tensorrt_llm::executor::request::setlogitspostprocessor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", false]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", false]], "tensorrt_llm::executor::request::setlookaheadconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", false]], "tensorrt_llm::executor::request::setmropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", false]], "tensorrt_llm::executor::request::setmultimodalembedding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", false]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", false]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", false]], "tensorrt_llm::executor::request::setpositionids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::request::setpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", false]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", false]], "tensorrt_llm::executor::request::setrequesttype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", false]], "tensorrt_llm::executor::request::setreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", false]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", false]], "tensorrt_llm::executor::request::setskipcrossattnblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", false]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", false]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", false]], "tensorrt_llm::executor::requestperfmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::firstiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", false]], "tensorrt_llm::executor::requestperfmetrics::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::kvcachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::nummissedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numnewallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numreusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numtotalallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::lastiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecoding (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::acceptancerate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totalaccepteddrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::timepoint (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::arrivaltime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firstscheduledtime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firsttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::lasttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", false]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE", false]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kencoder_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", false]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", false]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", false]], "tensorrt_llm::executor::requeststats::allocnewblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::alloctotalblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", false]], "tensorrt_llm::executor::requeststats::disservingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", false]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", false]], "tensorrt_llm::executor::requeststats::kvcachehitrateperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", false]], "tensorrt_llm::executor::requeststats::missedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", false]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", false]], "tensorrt_llm::executor::requeststats::reusedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", false]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", false]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", false]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", false]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", false]], "tensorrt_llm::executor::requesttype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", false]], "tensorrt_llm::executor::requesttype::request_type_context_and_generation (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", false]], "tensorrt_llm::executor::requesttype::request_type_context_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", false]], "tensorrt_llm::executor::requesttype::request_type_generation_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", false]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE", false]], "tensorrt_llm::executor::response::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", false]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", false]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", false]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", false]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", false]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", false]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", false]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", false]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", false]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE", false]], "tensorrt_llm::executor::result::additionaloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", false]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", false]], "tensorrt_llm::executor::result::contextphaseparams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", false]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", false]], "tensorrt_llm::executor::result::decodingiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", false]], "tensorrt_llm::executor::result::encoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", false]], "tensorrt_llm::executor::result::finishreasons (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", false]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", false]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", false]], "tensorrt_llm::executor::result::issequencefinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", false]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", false]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", false]], "tensorrt_llm::executor::result::requestperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", false]], "tensorrt_llm::executor::result::sequenceindex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", false]], "tensorrt_llm::executor::result::specdecfastlogitsinfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", false]], "tensorrt_llm::executor::retentionpriority (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", false]], "tensorrt_llm::executor::retentionpriorityandduration::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriorityandduration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checklengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidtharray (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", false]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", false]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", false]], "tensorrt_llm::executor::samplingconfig::getmintokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", false]], "tensorrt_llm::executor::samplingconfig::getnorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnbeams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnsequences (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", false]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", false]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", false]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", false]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", false]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidtharray (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", false]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", false]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", false]], "tensorrt_llm::executor::samplingconfig::mmintokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", false]], "tensorrt_llm::executor::samplingconfig::mnorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnbeams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnsequences (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", false]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", false]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", false]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", false]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", false]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", false]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", false]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", false]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setfrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setlengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setpresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setseed (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::settopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::updatenumreturnbeams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", false]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", false]], "tensorrt_llm::executor::schedulerconfig::getcapacityschedulerpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getcontextchunkingpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getdynamicbatchconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", false]], "tensorrt_llm::executor::schedulerconfig::mcapacityschedulerpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mcontextchunkingpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mdynamicbatchconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", false]], "tensorrt_llm::executor::schedulerconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", false]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", false]], "tensorrt_llm::executor::serialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13SerializationE", false]], "tensorrt_llm::executor::serialization::deserializeadditionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeadditionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeagentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializebool (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedatatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedisservingrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeeagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexecutorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeinflightbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstatsvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializelookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemodeltype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializepeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequestperfmetrics (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiteration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiterationvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresponse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresult (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesocketstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecfastlogitsinfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecodingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestaticbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestring (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetimepoint (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", false]], "tensorrt_llm::executor::serialization::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE", false]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", false]], "tensorrt_llm::executor::shape::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", false]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", false]], "tensorrt_llm::executor::sizetype32 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType32E", false]], "tensorrt_llm::executor::sizetype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType64E", false]], "tensorrt_llm::executor::specdecodingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", false]], "tensorrt_llm::executor::specdecodingstats::acceptancelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", false]], "tensorrt_llm::executor::specdecodingstats::draftoverhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", false]], "tensorrt_llm::executor::specdecodingstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", false]], "tensorrt_llm::executor::specdecodingstats::numacceptedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numrequestswithdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", false]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::speculativedecodingconfig::fastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", false]], "tensorrt_llm::executor::speculativedecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftparticipantid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftrequestid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::totensor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", false]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", false]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", false]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", false]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE", false]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", false], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", false]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", false]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", false]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", false]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", false]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", false]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", false]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", false]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", false]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", false]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", false], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", false]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", false]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", false]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", false]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", false]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", false]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", false]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", false]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", false]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", false]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", false]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", false]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", false]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", false]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", false]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", false]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", false]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", false]], "tensorrt_llm::executor::vectokenextraids (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", false]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE", false]], "tensorrt_llm::executor::version (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7versionEv", false]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE", false]], "tensorrt_llm::mpi (c++ type)": [[0, "_CPPv4N12tensorrt_llm3mpiE", false]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE", false], [1, "_CPPv4N12tensorrt_llm7runtimeE", false]], "tensorrt_llm::runtime::allreducebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", false]], "tensorrt_llm::runtime::allreducebuffers::allreducebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", false]], "tensorrt_llm::runtime::allreducebuffers::mallreducecommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mflagptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", false]], "tensorrt_llm::runtime::allreducebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", false]], "tensorrt_llm::runtime::buffercastornull (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", false]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", false]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsizeinbits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", false]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", false]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", false]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", false]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", false]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", false]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", false]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", false]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", false]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::cudamempoolptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", false]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", false]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", false]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", false]], "tensorrt_llm::runtime::buffermanager::ipcnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", false]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", false]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", false]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", false]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", false]], "tensorrt_llm::runtime::buffermanager::mpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", false]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", false]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", false]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", false]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", false]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", false]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", false]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", false], [1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", false]], "tensorrt_llm::runtime::canaccesspeer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", false]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", false]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", false]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", false]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", false]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", false]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", false]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", false]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", false]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", false]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", false]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", false]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", false]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", false]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", false]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", false]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", false]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", false]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", false]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", false]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", false]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", false]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", false]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::beamsearchbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mcumlogprobstmp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mnumsms (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::moutputbeamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", false]], "tensorrt_llm::runtime::decoder::decoderstate::allocatespeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::decoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getbeamsearchbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishedStepsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishreasons (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgatheredids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodinginput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodingoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingdecodertokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxsequencelength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnumdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getprevdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getsequencelengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mbeamsearchbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mfinishedsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14mFinishedStepsE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingdecodertokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mnumdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setnumdecodingenginetokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupeagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState10setupEagleEN12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setuplookahead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14setupLookaheadE24LookaheadDecodingBuffers", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", false]], "tensorrt_llm::runtime::decoder_batch (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", false]], "tensorrt_llm::runtime::decoder_batch::input (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", false]], "tensorrt_llm::runtime::decoder_batch::input::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input10batchSlotsE", false]], "tensorrt_llm::runtime::decoder_batch::input::batchslotsrequestorder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input22batchSlotsRequestOrderE", false]], "tensorrt_llm::runtime::decoder_batch::input::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", false]], "tensorrt_llm::runtime::decoder_batch::input::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input11eagleInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::eaglelastinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15eagleLastInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::explicitdrafttokenslastinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input29explicitDraftTokensLastInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15generationStepsE", false]], "tensorrt_llm::runtime::decoder_batch::input::input (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", false], [1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", false]], "tensorrt_llm::runtime::decoder_batch::input::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", false]], "tensorrt_llm::runtime::decoder_batch::input::maxdecodersteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15maxDecoderStepsE", false]], "tensorrt_llm::runtime::decoder_batch::input::predicteddraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input20predictedDraftLogitsE", false]], "tensorrt_llm::runtime::decoder_batch::input::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", false]], "tensorrt_llm::runtime::decoder_batch::input::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", false]], "tensorrt_llm::runtime::decoder_batch::output (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", false]], "tensorrt_llm::runtime::decoder_batch::output::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output16cacheIndirectionE", false]], "tensorrt_llm::runtime::decoder_batch::output::output (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output6OutputEv", false]], "tensorrt_llm::runtime::decoder_batch::output::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output9TensorPtrE", false]], "tensorrt_llm::runtime::decoder_batch::request (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", false]], "tensorrt_llm::runtime::decoder_batch::request::badwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", false]], "tensorrt_llm::runtime::decoder_batch::request::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", false]], "tensorrt_llm::runtime::decoder_batch::request::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE", false]], "tensorrt_llm::runtime::decoder_batch::request::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", false]], "tensorrt_llm::runtime::decoder_batch::request::dtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5dtypeE", false]], "tensorrt_llm::runtime::decoder_batch::request::eagleconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11eagleConfigE", false]], "tensorrt_llm::runtime::decoder_batch::request::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", false]], "tensorrt_llm::runtime::decoder_batch::request::endid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", false]], "tensorrt_llm::runtime::decoder_batch::request::generatedtokensperenginestep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE", false]], "tensorrt_llm::runtime::decoder_batch::request::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", false]], "tensorrt_llm::runtime::decoder_batch::request::inputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", false]], "tensorrt_llm::runtime::decoder_batch::request::lookaheadruntimeconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request22lookaheadRuntimeConfigE", false]], "tensorrt_llm::runtime::decoder_batch::request::maxnewtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", false]], "tensorrt_llm::runtime::decoder_batch::request::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE", false]], "tensorrt_llm::runtime::decoder_batch::request::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decoder_batch::request::request (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::decoder_batch::request::stopwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", false]], "tensorrt_llm::runtime::decoder_batch::request::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14TensorConstPtrE", false]], "tensorrt_llm::runtime::decoder_batch::request::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", false]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", false]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::badwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", false]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", false]], "tensorrt_llm::runtime::decodinginput::beamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", false]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedpathids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::eagleinputs (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", false]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastpositionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::maxgenlengthdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::constantthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::targetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::userandomacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", false]], "tensorrt_llm::runtime::decodinginput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", false]], "tensorrt_llm::runtime::decodinginput::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", false]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", false]], "tensorrt_llm::runtime::decodinginput::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", false]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs::tokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", false]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", false]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", false]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", false]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", false]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", false]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::batchdones (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeamscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", false]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::decodingoutput::eaglebuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::explicitdrafttokensbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", false]], "tensorrt_llm::runtime::decodingoutput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", false]], "tensorrt_llm::runtime::decodingoutput::gatheredids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", false]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", false]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", false]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", false]], "tensorrt_llm::runtime::decodingoutput::lookaheadoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", false]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", false]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::pathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::prevdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", false]], "tensorrt_llm::runtime::deviceallocationnvls (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_capacity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", false]], "tensorrt_llm::runtime::deviceallocationnvls::deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::free (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getipcunicastpointers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getmulticastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getunicastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::reset (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::deviceallocationnvls::~deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", false]], "tensorrt_llm::runtime::eaglebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", false]], "tensorrt_llm::runtime::eaglebuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::chunkedcontextnexttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::eaglebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::greedysamplinghost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenidspredecessor (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::currentexpandindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpathshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::dynamictreemaxtopkhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxcontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgencontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::inputgentokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posterioralpha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::prevscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usedynamictreehost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::eaglebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", false]], "tensorrt_llm::runtime::eaglebuffers::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::maxgenerationlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", false]], "tensorrt_llm::runtime::eaglebuffers::mdefaultposteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::mdogreedysampling (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", false]], "tensorrt_llm::runtime::eaglebuffers::posterioralphahost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", false]], "tensorrt_llm::runtime::eaglebuffers::posteriorthresholdhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", false]], "tensorrt_llm::runtime::eaglebuffers::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", false]], "tensorrt_llm::runtime::eaglebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", false]], "tensorrt_llm::runtime::eaglebuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", false]], "tensorrt_llm::runtime::eaglebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", false]], "tensorrt_llm::runtime::eaglebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::eaglemodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", false]], "tensorrt_llm::runtime::eaglemodule::eaglemodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", false]], "tensorrt_llm::runtime::eaglemodule::getdefaulteaglechoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", false]], "tensorrt_llm::runtime::eaglemodule::getmaxnonleafnodesperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", false]], "tensorrt_llm::runtime::eaglemodule::getnumtransformerlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", false]], "tensorrt_llm::runtime::eaglemodule::mdefaulteaglechoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", false]], "tensorrt_llm::runtime::eaglemodule::mmaxnonleafnodesperlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", false]], "tensorrt_llm::runtime::eaglemodule::mnumtransformerslayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::requesttypesdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::maxgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::totalgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::explicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::maxgenlengthhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", false], [1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", false]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", false]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", false]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", false]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", false]], "tensorrt_llm::runtime::getdefaultbatchslots (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", false]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", false]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodinglayerworkspace (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", false]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", false]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", false]], "tensorrt_llm::runtime::gptdecoder::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", false]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", false]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", false]], "tensorrt_llm::runtime::gptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::gptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwarddispatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getunderlyingdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mruntimestream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::prepareforward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::gptdecoderbatched::seteagleinputs (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setexplicitdrafttokensinputs (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::gptdecoderbatched::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", false], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", false]], "tensorrt_llm::runtime::gptjsonconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfigmutable (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getruntimedefaults (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", false]], "tensorrt_llm::runtime::gptjsonconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::gptjsonconfig::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", false]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", false]], "tensorrt_llm::runtime::gptjsonconfig::mruntimedefaults (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", false]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", false]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", false]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", false]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", false]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", false]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", false]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", false]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", false]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", false]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", false]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", false]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", false]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", false]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", false]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", false]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::igptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", false]], "tensorrt_llm::runtime::igptdecoder::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", false]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", false]], "tensorrt_llm::runtime::igptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", false]], "tensorrt_llm::runtime::igptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::igptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::igptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::igptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::igptdecoderbatched::igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", false]], "tensorrt_llm::runtime::igptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::igptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::igptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::~igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", false]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", false]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", false]], "tensorrt_llm::runtime::ipcmemory::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", false]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", false]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", false]], "tensorrt_llm::runtime::ipcmemory::getcommptrs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", false]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::mbuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", false]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", false]], "tensorrt_llm::runtime::ipcmemory::mopenipc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", false]], "tensorrt_llm::runtime::ipcmemory::mtprank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", false]], "tensorrt_llm::runtime::ipcmemory::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", false]], "tensorrt_llm::runtime::ipcnvlsallocate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::ipcnvlsfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", false]], "tensorrt_llm::runtime::ipcnvlshandle (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_handles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_ptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_vas (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", false]], "tensorrt_llm::runtime::ipcnvlshandle::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", false]], "tensorrt_llm::runtime::ipcnvlssupported (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", false]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", false]], "tensorrt_llm::runtime::itensor::at (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", false]], "tensorrt_llm::runtime::itensor::dimtype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", false]], "tensorrt_llm::runtime::itensor::flattenn (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", false]], "tensorrt_llm::runtime::itensor::getdimension (c++ function)": [[1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", false]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", false]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", false]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", false]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", false]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", false]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", false]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", false]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", false]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", false]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::strides (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", false]], "tensorrt_llm::runtime::itensor::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", false]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", false]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", false]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", false]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", false]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", false]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", false]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", false]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", false]], "tensorrt_llm::runtime::lamportinitializeall (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::lookaheaddecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", false]], "tensorrt_llm::runtime::lookaheadmodule::getexecutionconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", false]], "tensorrt_llm::runtime::lookaheadmodule::lookaheadmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", false]], "tensorrt_llm::runtime::lookaheadmodule::mexecutionconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", false]], "tensorrt_llm::runtime::lookaheadmodule::setexecutionconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::batchslotshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::cumsumlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::disablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::enablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::lookaheadruntimebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmasksdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::setfrominputs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", false]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", false]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", false]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", false]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", false]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", false], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", false]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", false]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", false]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", false]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", false]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", false]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", false]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", false]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", false]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", false]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", false]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", false]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", false]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", false]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", false]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", false]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::scalingvecpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", false]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", false]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", false]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", false]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", false]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", false]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", false]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", false]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", false]], "tensorrt_llm::runtime::loracachefullexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", false]], "tensorrt_llm::runtime::loracachefullexception::loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loracachefullexception::~loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", false]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", false]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", false]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", false]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", false]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", false]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", false]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", false]], "tensorrt_llm::runtime::loraexpectedexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", false]], "tensorrt_llm::runtime::loraexpectedexception::loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loraexpectedexception::~loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", false]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", false]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", false]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localscalessize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::localtotalsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", false]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", false]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate_up (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", false]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", false]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", false]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", false]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", false]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", false]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", false]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", false]], "tensorrt_llm::runtime::lorataskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", false]], "tensorrt_llm::runtime::medusamodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", false]], "tensorrt_llm::runtime::medusamodule::getmedusachoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", false]], "tensorrt_llm::runtime::medusamodule::mdefaultmedusachoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusachoices (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", false]], "tensorrt_llm::runtime::medusamodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", false]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", false]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", false]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", false]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", false]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", false]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", false]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", false]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpooldiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", false]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", false]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", false]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", false]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", false]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", false]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpooldiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", false]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", false]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", false]], "tensorrt_llm::runtime::memorycounters::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", false]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", false]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", false]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", false]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", false]], "tensorrt_llm::runtime::memorytype::kpinnedpool (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", false]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", false]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", false]], "tensorrt_llm::runtime::modelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::modelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::countlocallayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::countlowerranklayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::disableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::runtime::modelconfig::enableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getencoderhiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getfirstlocallayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getgemmallreducedtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvcachetype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getlayertypes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", false]], "tensorrt_llm::runtime::modelconfig::getlogitsdtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", false]], "tensorrt_llm::runtime::modelconfig::getmanageweightstype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxencoderlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpositionembeddings (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", false]], "tensorrt_llm::runtime::modelconfig::getnbattentionlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", false]], "tensorrt_llm::runtime::modelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbrnnlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayerlocalrange (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::getnumlanguages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", false]], "tensorrt_llm::runtime::modelconfig::getoptprofilessplitpoints (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", false]], "tensorrt_llm::runtime::modelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getppreducescatter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", false]], "tensorrt_llm::runtime::modelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", false]], "tensorrt_llm::runtime::modelconfig::getrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::getrotaryembeddingdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", false]], "tensorrt_llm::runtime::modelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmoduleptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false]], "tensorrt_llm::runtime::modelconfig::getsumlocalkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::hasrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::hasspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::iscontinuouskvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::iskvcacheenabled (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", false]], "tensorrt_llm::runtime::modelconfig::ismultimodal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", false]], "tensorrt_llm::runtime::modelconfig::ispagedkvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::isrnnbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", false]], "tensorrt_llm::runtime::modelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", false]], "tensorrt_llm::runtime::modelconfig::iswhisper (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", false]], "tensorrt_llm::runtime::modelconfig::kdefault_num_tokens_per_block (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", false]], "tensorrt_llm::runtime::modelconfig::kopt_profiles_split_points (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kcontinuous (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kpaged (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetypefromstring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::layertype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", false]], "tensorrt_llm::runtime::modelconfig::layertype::kattention (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", false]], "tensorrt_llm::runtime::modelconfig::layertype::klinear (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", false]], "tensorrt_llm::runtime::modelconfig::layertype::knoop (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", false]], "tensorrt_llm::runtime::modelconfig::layertype::krecurrent (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kenabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", false]], "tensorrt_llm::runtime::modelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", false]], "tensorrt_llm::runtime::modelconfig::mencoderhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mgemmallreducedtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", false]], "tensorrt_llm::runtime::modelconfig::mkvcachetype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::mlayertypes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", false]], "tensorrt_llm::runtime::modelconfig::mlogitsdtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", false]], "tensorrt_llm::runtime::modelconfig::mmanageweightstype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::modelconfig::mmaxencoderlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", false]], "tensorrt_llm::runtime::modelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpositionembeddings (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", false]], "tensorrt_llm::runtime::modelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmodelname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", false]], "tensorrt_llm::runtime::modelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::mnbattentionlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", false]], "tensorrt_llm::runtime::modelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbrnnlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadsperattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadspercrossattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumlanguages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", false]], "tensorrt_llm::runtime::modelconfig::modelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kchatglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kencdec (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::krecurrentgemma (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", false]], "tensorrt_llm::runtime::modelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", false]], "tensorrt_llm::runtime::modelconfig::mppreducescatter (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", false]], "tensorrt_llm::runtime::modelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", false]], "tensorrt_llm::runtime::modelconfig::mrnnconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::mrotaryembeddingdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", false]], "tensorrt_llm::runtime::modelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::runtime::modelconfig::mskipcrossattnblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::modelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::runtime::modelconfig::musecrossattention (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", false]], "tensorrt_llm::runtime::modelconfig::musegemmallreduceplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", false]], "tensorrt_llm::runtime::modelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", false]], "tensorrt_llm::runtime::modelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemrope (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", false]], "tensorrt_llm::runtime::modelconfig::musepositionembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::museshapeinference (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", false]], "tensorrt_llm::runtime::modelconfig::musetokentypeembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", false]], "tensorrt_llm::runtime::modelconfig::resetspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::convkernel (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnconvdimsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnheadsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::statesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", false]], "tensorrt_llm::runtime::modelconfig::setcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setencoderhiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setgemmallreducedtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setkvcachetype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", false]], "tensorrt_llm::runtime::modelconfig::setlayertypes (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", false]], "tensorrt_llm::runtime::modelconfig::setlogitsdtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", false]], "tensorrt_llm::runtime::modelconfig::setmanageweightstype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", false]], "tensorrt_llm::runtime::modelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxencoderlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setmaxpositionembeddings (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmodelname (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", false]], "tensorrt_llm::runtime::modelconfig::setnbcrosskvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadspercrosslayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadsperlayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumlanguages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setppreducescatter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", false]], "tensorrt_llm::runtime::modelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", false]], "tensorrt_llm::runtime::modelconfig::setrnnconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", false]], "tensorrt_llm::runtime::modelconfig::setrotaryembeddingdim (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setskipcrossattnblocks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::modelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setusecrossattention (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", false]], "tensorrt_llm::runtime::modelconfig::setusemrope (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", false]], "tensorrt_llm::runtime::modelconfig::setusepositionembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::setuseshapeinference (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", false]], "tensorrt_llm::runtime::modelconfig::setusetokentypeembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::skipcrossattnblocks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", false]], "tensorrt_llm::runtime::modelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", false]], "tensorrt_llm::runtime::modelconfig::usecrossattention (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", false]], "tensorrt_llm::runtime::modelconfig::usegemmallreduceplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", false]], "tensorrt_llm::runtime::modelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", false]], "tensorrt_llm::runtime::modelconfig::uselanguageadapter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", false]], "tensorrt_llm::runtime::modelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemrope (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", false]], "tensorrt_llm::runtime::modelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", false]], "tensorrt_llm::runtime::modelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", false]], "tensorrt_llm::runtime::modelconfig::usepositionembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", false]], "tensorrt_llm::runtime::modelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", false]], "tensorrt_llm::runtime::modelconfig::useshapeinference (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", false]], "tensorrt_llm::runtime::modelconfig::usetokentypeembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", false]], "tensorrt_llm::runtime::mpi_group_barrier (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", false]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", false]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", false]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", false]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::prompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::rawengine (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", false]], "tensorrt_llm::runtime::rawengine::getaddress (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", false]], "tensorrt_llm::runtime::rawengine::gethostmemory (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", false]], "tensorrt_llm::runtime::rawengine::getmanagedweightsmapopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", false]], "tensorrt_llm::runtime::rawengine::getpath (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", false]], "tensorrt_llm::runtime::rawengine::getpathopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", false]], "tensorrt_llm::runtime::rawengine::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", false]], "tensorrt_llm::runtime::rawengine::gettype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", false]], "tensorrt_llm::runtime::rawengine::mengineaddr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", false]], "tensorrt_llm::runtime::rawengine::menginebuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", false]], "tensorrt_llm::runtime::rawengine::menginepath (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", false]], "tensorrt_llm::runtime::rawengine::menginesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", false]], "tensorrt_llm::runtime::rawengine::mmanagedweightsmap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", false]], "tensorrt_llm::runtime::rawengine::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", false]], "tensorrt_llm::runtime::rawengine::rawengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", false]], "tensorrt_llm::runtime::rawengine::setmanagedweightsmap (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", false]], "tensorrt_llm::runtime::rawengine::setpath (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", false]], "tensorrt_llm::runtime::rawengine::type (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", false]], "tensorrt_llm::runtime::rawengine::type::addresswithsize (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", false]], "tensorrt_llm::runtime::rawengine::type::filepath (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", false]], "tensorrt_llm::runtime::rawengine::type::hostmemory (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", false]], "tensorrt_llm::runtime::requesttype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", false]], "tensorrt_llm::runtime::requesttype::kcontext (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", false]], "tensorrt_llm::runtime::requesttype::kgeneration (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", false]], "tensorrt_llm::runtime::runtimedefaults (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::runtime::runtimedefaults::maxattentionwindowvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", false]], "tensorrt_llm::runtime::runtimedefaults::runtimedefaults (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", false], [1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", false]], "tensorrt_llm::runtime::runtimedefaults::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", false]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", false]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidtharray (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", false]], "tensorrt_llm::runtime::samplingconfig::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", false]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", false]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", false]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", false]], "tensorrt_llm::runtime::samplingconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::samplingconfig::getnumreturnbeams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", false]], "tensorrt_llm::runtime::samplingconfig::minp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", false]], "tensorrt_llm::runtime::samplingconfig::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::numreturnsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", false]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", false]], "tensorrt_llm::runtime::samplingconfig::originaltemperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", false]], "tensorrt_llm::runtime::samplingconfig::outputlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", false]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", false]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", false]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", false]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", false]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", false]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", false]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", false]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", false]], "tensorrt_llm::runtime::samplingconfig::usedefaultvalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", false]], "tensorrt_llm::runtime::samplingconfig::validate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", false]], "tensorrt_llm::runtime::samplingconfig::validatevec (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", false]], "tensorrt_llm::runtime::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", false]], "tensorrt_llm::runtime::sizetype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", false]], "tensorrt_llm::runtime::speculativedecodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::drafttokensexternal (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::eagle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::explicitdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::hasdraftlogits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isdrafttokensexternal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::iseagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::islookaheaddecoding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::ismedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isnone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::kdrafttokensexternal (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", false]], "tensorrt_llm::runtime::speculativedecodingmode::keagle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kexplicitdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmode::klookaheaddecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", false]], "tensorrt_llm::runtime::speculativedecodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", false]], "tensorrt_llm::runtime::speculativedecodingmode::lookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", false]], "tensorrt_llm::runtime::speculativedecodingmode::needsdecoderprologue (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::needskvcacherewind (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::speculativedecodingmode::predictsdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::requiresattentionmask (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::speculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::updatespositionids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::variabledraftlength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::computenumpackedmasks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdraftpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxnumpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getnumpackedmasks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdecodingdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdraftpathlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdraftpathlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxnumpaths (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::~speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", false]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", false]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", false]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", false]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", false]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", false]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::tokenextraidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", false]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", false]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", false]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", false]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", false]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", false]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", false]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", false]], "tensorrt_llm::runtime::uniquetoken (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", false]], "tensorrt_llm::runtime::uniquetoken::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", false]], "tensorrt_llm::runtime::uniquetoken::tokenextraid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", false]], "tensorrt_llm::runtime::uniquetoken::tokenid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", false]], "tensorrt_llm::runtime::vectokenextraids (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", false]], "tensorrt_llm::runtime::vecuniquetokens (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", false]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::worldconfig::enableattentiondp (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", false]], "tensorrt_llm::runtime::worldconfig::getdeviceof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", false]], "tensorrt_llm::runtime::worldconfig::getlocalrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderankof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", false]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::iscontextparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirsttensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", false]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", false]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", false]], "tensorrt_llm::runtime::worldconfig::menableattentiondp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", false]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", false]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", false]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "text (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.text", false]], "text_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.text_diff", false]], "text_diff (tensorrt_llm.llmapi.completionoutput property)": [[70, "id4", false]], "timestepembedding (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.TimestepEmbedding", false]], "timesteps (class in tensorrt_llm.layers.embedding)": [[83, "tensorrt_llm.layers.embedding.Timesteps", false]], "to_dict() (tensorrt_llm.llmapi.buildconfig method)": [[70, "tensorrt_llm.llmapi.BuildConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.calibconfig method)": [[70, "tensorrt_llm.llmapi.CalibConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.quantconfig method)": [[70, "tensorrt_llm.llmapi.QuantConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.chatglmconfig method)": [[84, "tensorrt_llm.models.ChatGLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.cogvlmconfig method)": [[84, "tensorrt_llm.models.CogVLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.dbrxconfig method)": [[84, "tensorrt_llm.models.DbrxConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.falconconfig method)": [[84, "tensorrt_llm.models.FalconConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gemmaconfig method)": [[84, "tensorrt_llm.models.GemmaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptconfig method)": [[84, "tensorrt_llm.models.GPTConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptjconfig method)": [[84, "tensorrt_llm.models.GPTJConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.llamaconfig method)": [[84, "tensorrt_llm.models.LLaMAConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.medusaconfig method)": [[84, "tensorrt_llm.models.MedusaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.to_dict", false]], "to_json_file() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.to_json_file", false]], "to_layer_quant_config() (tensorrt_llm.models.pretrainedconfig method)": [[84, "tensorrt_llm.models.PretrainedConfig.to_layer_quant_config", false]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[85, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting", false]], "token_drop() (tensorrt_llm.layers.embedding.labelembedding method)": [[83, "tensorrt_llm.layers.embedding.LabelEmbedding.token_drop", false]], "token_end (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end", false]], "token_ids (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.token_ids", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[70, "tensorrt_llm.llmapi.CompletionOutput.token_ids_diff", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput property)": [[70, "id5", false]], "token_range_retention_configs (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs", false]], "token_start (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start", false]], "tokenizer (tensorrt_llm.llmapi.llm attribute)": [[70, "tensorrt_llm.llmapi.LLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm property)": [[70, "id0", false]], "tokenizer_image_token() (tensorrt_llm.runtime.multimodalmodelrunner static method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token", false]], "tokenizer_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[70, "tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length", false]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.tokens_per_block", false]], "top_k (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.top_k", false]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.top_k", false]], "top_p (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.top_p", false]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.top_p", false]], "top_p_decay (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.top_p_decay", false]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.top_p_decay", false]], "top_p_min (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.top_p_min", false]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.top_p_min", false]], "top_p_reset_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids", false]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids", false]], "topk() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.topk", false]], "torch_compile_enable_userbuffers (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_enable_userbuffers", false]], "torch_compile_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_enabled", false]], "torch_compile_fullgraph (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_fullgraph", false]], "torch_compile_inductor_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_inductor_enabled", false]], "torch_compile_piecewise_cuda_graph (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_piecewise_cuda_graph", false]], "torchllmargs (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linear class method)": [[83, "tensorrt_llm.layers.linear.Linear.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linearbase class method)": [[83, "tensorrt_llm.layers.linear.LinearBase.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.rowlinear class method)": [[83, "tensorrt_llm.layers.linear.RowLinear.tp_split_dim", false]], "transfer_mode (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[70, "tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode", false]], "transpose() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.transpose", false]], "transpose() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.transpose", false]], "trtllm-serve-disaggregated command line option": [[30, "cmdoption-trtllm-serve-disaggregated-c", false], [30, "cmdoption-trtllm-serve-disaggregated-r", false], [30, "cmdoption-trtllm-serve-disaggregated-t", false]], "trtllm-serve-disaggregated_mpi_worker command line option": [[30, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false], [30, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false]], "trtllm-serve-serve command line option": [[30, "cmdoption-trtllm-serve-serve-arg-MODEL", false], [30, "cmdoption-trtllm-serve-serve-backend", false], [30, "cmdoption-trtllm-serve-serve-cluster_size", false], [30, "cmdoption-trtllm-serve-serve-ep_size", false], [30, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false], [30, "cmdoption-trtllm-serve-serve-gpus_per_node", false], [30, "cmdoption-trtllm-serve-serve-host", false], [30, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false], [30, "cmdoption-trtllm-serve-serve-log_level", false], [30, "cmdoption-trtllm-serve-serve-max_batch_size", false], [30, "cmdoption-trtllm-serve-serve-max_beam_width", false], [30, "cmdoption-trtllm-serve-serve-max_num_tokens", false], [30, "cmdoption-trtllm-serve-serve-max_seq_len", false], [30, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false], [30, "cmdoption-trtllm-serve-serve-port", false], [30, "cmdoption-trtllm-serve-serve-pp_size", false], [30, "cmdoption-trtllm-serve-serve-reasoning_parser", false], [30, "cmdoption-trtllm-serve-serve-tokenizer", false], [30, "cmdoption-trtllm-serve-serve-tp_size", false], [30, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules", false]], "trtllmargs (class in tensorrt_llm.llmapi)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs", false]], "truncate_prompt_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens", false]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT", false]], "ub (tensorrt_llm.functional.allreducestrategy attribute)": [[82, "tensorrt_llm.functional.AllReduceStrategy.UB", false]], "unary() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.unary", false]], "unbind() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.unbind", false]], "unbind() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.unbind", false]], "unfuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[84, "tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections", false]], "unpatchify() (tensorrt_llm.models.dit method)": [[84, "tensorrt_llm.models.DiT.unpatchify", false]], "unsqueeze() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.unsqueeze", false]], "unsqueeze() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.unsqueeze", false]], "update() (tensorrt_llm.llmapi.buildconfig method)": [[70, "tensorrt_llm.llmapi.BuildConfig.update", false]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[87, "tensorrt_llm.runtime.SamplingConfig.update", false]], "update_from_dict() (tensorrt_llm.llmapi.buildconfig method)": [[70, "tensorrt_llm.llmapi.BuildConfig.update_from_dict", false]], "update_kv_cache_type() (tensorrt_llm.llmapi.buildconfig method)": [[70, "tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type", false]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[87, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset", false]], "update_strategy() (tensorrt_llm.functional.allreduceparams method)": [[82, "tensorrt_llm.functional.AllReduceParams.update_strategy", false]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[87, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps", false]], "use_beam_search (tensorrt_llm.llmapi.samplingparams attribute)": [[70, "tensorrt_llm.llmapi.SamplingParams.use_beam_search", false]], "use_cuda_graph (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.use_cuda_graph", false]], "use_dynamic_tree (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree", false]], "use_gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin", false]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin", false]], "use_kv_cache (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.use_kv_cache", false]], "use_kv_cache (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.use_kv_cache", false]], "use_lora() (tensorrt_llm.models.decodermodel method)": [[84, "tensorrt_llm.models.DecoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.encodermodel method)": [[84, "tensorrt_llm.models.EncoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.gemmaforcausallm method)": [[84, "tensorrt_llm.models.GemmaForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[84, "tensorrt_llm.models.GPTForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[84, "tensorrt_llm.models.LLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.mllamaforcausallm method)": [[84, "tensorrt_llm.models.MLLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phi3forcausallm method)": [[84, "tensorrt_llm.models.Phi3ForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phiforcausallm method)": [[84, "tensorrt_llm.models.PhiForCausalLM.use_lora", false]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin", false]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin", false]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin", false]], "use_meta_recipe (tensorrt_llm.llmapi.quantconfig attribute)": [[70, "tensorrt_llm.llmapi.QuantConfig.use_meta_recipe", false]], "use_mrope (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.use_mrope", false]], "use_prompt_tuning() (tensorrt_llm.models.encodermodel method)": [[84, "tensorrt_llm.models.EncoderModel.use_prompt_tuning", false]], "use_refit (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.use_refit", false]], "use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[70, "tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking", false]], "use_strip_plan (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.use_strip_plan", false]], "validate_cuda_graph_config() (tensorrt_llm.llmapi.torchllmargs method)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config", false]], "validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.torchllmargs class method)": [[70, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_max_batch_size", false]], "validate_positive_values() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[70, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values", false]], "verbatim (tensorrt_llm.models.gemmaconfig attribute)": [[84, "tensorrt_llm.models.GemmaConfig.VERBATIM", false]], "video_preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess", false]], "view() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.view", false]], "view() (tensorrt_llm.functional.tensor method)": [[82, "tensorrt_llm.functional.Tensor.view", false]], "view() (tensorrt_llm.runtime.tensorinfo method)": [[87, "tensorrt_llm.runtime.TensorInfo.view", false]], "visual_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[87, "tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir", false]], "visualize_network (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.visualize_network", false]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[87, "tensorrt_llm.runtime.GenerationSession.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[87, "tensorrt_llm.runtime.ModelConfig.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[87, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[87, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded", false]], "w4a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W4A16", false]], "w4a16_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ", false]], "w4a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ", false]], "w4a8_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ", false]], "w4a8_qserve_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL", false]], "w4a8_qserve_per_group (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP", false]], "w8a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A16", false]], "w8a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ", false]], "w8a8_sq_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL", false]], "w8a8_sq_per_channel_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN", false]], "w8a8_sq_per_channel_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[70, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN", false]], "weight_loader() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[83, "tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[83, "tensorrt_llm.layers.embedding.Embedding.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.linear.linearbase method)": [[83, "tensorrt_llm.layers.linear.LinearBase.weight_loader", false]], "weight_sparsity (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.weight_sparsity", false]], "weight_streaming (tensorrt_llm.llmapi.buildconfig attribute)": [[70, "tensorrt_llm.llmapi.BuildConfig.weight_streaming", false]], "where() (in module tensorrt_llm.functional)": [[82, "tensorrt_llm.functional.where", false]], "whisperencoder (class in tensorrt_llm.models)": [[84, "tensorrt_llm.models.WhisperEncoder", false]], "workspace (tensorrt_llm.llmapi.llm attribute)": [[70, "tensorrt_llm.llmapi.LLM.workspace", false]], "workspace (tensorrt_llm.llmapi.llm property)": [[70, "id1", false]], "workspace (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "tensorrt_llm.llmapi.TrtLlmArgs.workspace", false]], "wrapped_property (tensorrt_llm.llmapi.torchllmargs attribute)": [[70, "id11", false], [70, "id14", false], [70, "id17", false], [70, "tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property", false]], "wrapped_property (tensorrt_llm.llmapi.trtllmargs attribute)": [[70, "id20", false], [70, "id23", false], [70, "id26", false], [70, "id29", false], [70, "id32", false], [70, "tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property", false]], "yarn (tensorrt_llm.functional.positionembeddingtype attribute)": [[82, "tensorrt_llm.functional.PositionEmbeddingType.yarn", false]], "yarn (tensorrt_llm.functional.rotaryscalingtype attribute)": [[82, "tensorrt_llm.functional.RotaryScalingType.yarn", false]]}, "objects": {"": [[1, 0, 1, "c.FMT_DIM", "FMT_DIM"], [1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", "tensorrt_llm::executor::AdditionalModelOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::gatherContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::name"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", "tensorrt_llm::executor::AdditionalModelOutput::gatherContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", "tensorrt_llm::executor::AdditionalModelOutput::name"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", "tensorrt_llm::executor::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::output"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", "tensorrt_llm::executor::AdditionalOutput::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", "tensorrt_llm::executor::AdditionalOutput::output"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", "tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BufferViewE", "tensorrt_llm::executor::BufferView"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", "tensorrt_llm::executor::CacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::maxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig15getMaxNumTokensEv", "tensorrt_llm::executor::CacheTransceiverConfig::getMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig13mMaxNumTokensE", "tensorrt_llm::executor::CacheTransceiverConfig::mMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig15setMaxNumTokensE6size_t", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig15setMaxNumTokensE6size_t", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxNumTokens::maxNumTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", "tensorrt_llm::executor::CapacitySchedulerPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", "tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", "tensorrt_llm::executor::ContextChunkingPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", "tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", "tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", "tensorrt_llm::executor::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::serializedState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::state"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", "tensorrt_llm::executor::ContextPhaseParams::RequestIdType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", "tensorrt_llm::executor::ContextPhaseParams::StatePtr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter::data"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getDraftTokens"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", "tensorrt_llm::executor::ContextPhaseParams::getReqId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", "tensorrt_llm::executor::ContextPhaseParams::getSerializedState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", "tensorrt_llm::executor::ContextPhaseParams::mDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", "tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", "tensorrt_llm::executor::ContextPhaseParams::mReqId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", "tensorrt_llm::executor::ContextPhaseParams::mState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator=="], [0, 3, 1, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", "tensorrt_llm::executor::ContextPhaseParams::releaseState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", "tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", "tensorrt_llm::executor::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::cacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::commState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", "tensorrt_llm::executor::DataTransceiverState::getCacheState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", "tensorrt_llm::executor::DataTransceiverState::getCommState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", "tensorrt_llm::executor::DataTransceiverState::mCacheState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", "tensorrt_llm::executor::DataTransceiverState::mCommState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", "tensorrt_llm::executor::DataTransceiverState::toString"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", "tensorrt_llm::executor::DebugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorsMaxIterations"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", "tensorrt_llm::executor::DebugConfig::StringVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugInputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugOutputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugInputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugOutputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", "tensorrt_llm::executor::DebugConfig::mDebugTensorNames"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", "tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors::debugInputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors::debugOutputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames::debugTensorNames"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations::debugTensorsMaxIterations"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", "tensorrt_llm::executor::DebugTensorsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", "tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", "tensorrt_llm::executor::DebugTensorsPerIteration::iter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", "tensorrt_llm::executor::DecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::medusaChoices"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", "tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", "tensorrt_llm::executor::DecodingConfig::getDecodingMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", "tensorrt_llm::executor::DecodingConfig::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", "tensorrt_llm::executor::DecodingConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", "tensorrt_llm::executor::DecodingConfig::mDecodingMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", "tensorrt_llm::executor::DecodingConfig::mEagleConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", "tensorrt_llm::executor::DecodingConfig::mMedusaChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", "tensorrt_llm::executor::DecodingConfig::setDecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", "tensorrt_llm::executor::DecodingConfig::setEagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig::lookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::DecodingConfig::setMedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", "tensorrt_llm::executor::DecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", "tensorrt_llm::executor::DecodingMode::Auto"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", "tensorrt_llm::executor::DecodingMode::BeamSearch"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", "tensorrt_llm::executor::DecodingMode::Eagle"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExternalDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", "tensorrt_llm::executor::DecodingMode::Lookahead"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", "tensorrt_llm::executor::DecodingMode::Medusa"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", "tensorrt_llm::executor::DecodingMode::TopK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", "tensorrt_llm::executor::DecodingMode::TopKTopP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", "tensorrt_llm::executor::DecodingMode::TopP"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", "tensorrt_llm::executor::DecodingMode::UnderlyingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", "tensorrt_llm::executor::DecodingMode::getName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", "tensorrt_llm::executor::DecodingMode::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", "tensorrt_llm::executor::DecodingMode::isAuto"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", "tensorrt_llm::executor::DecodingMode::isBeamSearch"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", "tensorrt_llm::executor::DecodingMode::isEagle"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExternalDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", "tensorrt_llm::executor::DecodingMode::isLookahead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", "tensorrt_llm::executor::DecodingMode::isMedusa"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", "tensorrt_llm::executor::DecodingMode::isTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKandTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKorTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", "tensorrt_llm::executor::DecodingMode::isTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", "tensorrt_llm::executor::DecodingMode::isUseBanTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", "tensorrt_llm::executor::DecodingMode::isUseBanWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", "tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", "tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", "tensorrt_llm::executor::DecodingMode::isUseMinLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", "tensorrt_llm::executor::DecodingMode::isUseMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", "tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", "tensorrt_llm::executor::DecodingMode::isUseStopCriteria"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", "tensorrt_llm::executor::DecodingMode::isUseStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", "tensorrt_llm::executor::DecodingMode::isUseTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", "tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", "tensorrt_llm::executor::DecodingMode::kAuto"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", "tensorrt_llm::executor::DecodingMode::kBeamSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", "tensorrt_llm::executor::DecodingMode::kEagle"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExternalDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", "tensorrt_llm::executor::DecodingMode::kLookahead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", "tensorrt_llm::executor::DecodingMode::kMedusa"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", "tensorrt_llm::executor::DecodingMode::kNumFlags"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", "tensorrt_llm::executor::DecodingMode::kTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", "tensorrt_llm::executor::DecodingMode::kTopKTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", "tensorrt_llm::executor::DecodingMode::kTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", "tensorrt_llm::executor::DecodingMode::kUseBanTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", "tensorrt_llm::executor::DecodingMode::kUseBanWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", "tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", "tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", "tensorrt_llm::executor::DecodingMode::kUseMinLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", "tensorrt_llm::executor::DecodingMode::kUseMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", "tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePresencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", "tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", "tensorrt_llm::executor::DecodingMode::kUseStopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", "tensorrt_llm::executor::DecodingMode::kUseTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", "tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", "tensorrt_llm::executor::DecodingMode::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::x"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens::banTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords::banWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop::explicitEosStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop::maxLengthStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength::useMinLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP::useMinP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature::useTemp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch::useVariableBeamWidthSearch"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", "tensorrt_llm::executor::DisServingRequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", "tensorrt_llm::executor::DynamicBatchConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::batchSizeTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::dynamicBatchMovingAverageWindow"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableBatchSizeTuning"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableMaxNumTokensTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", "tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", "tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", "tensorrt_llm::executor::EagleChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", "tensorrt_llm::executor::EagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::dynamicTreeMaxTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::eagleChoices"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::greedySampling"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::posteriorThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::useDynamicTree"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue::value"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", "tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", "tensorrt_llm::executor::EagleConfig::getEagleChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", "tensorrt_llm::executor::EagleConfig::getPosteriorThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", "tensorrt_llm::executor::EagleConfig::isGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", "tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", "tensorrt_llm::executor::EagleConfig::mEagleChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", "tensorrt_llm::executor::EagleConfig::mGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", "tensorrt_llm::executor::EagleConfig::mPosteriorThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", "tensorrt_llm::executor::EagleConfig::mUseDynamicTree"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", "tensorrt_llm::executor::EagleConfig::useDynamicTree"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor::executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::managedWeights"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", "tensorrt_llm::executor::Executor::getKVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", "tensorrt_llm::executor::Executor::getLatestDebugTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", "tensorrt_llm::executor::Executor::isParticipant"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator=::executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::promptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::recvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::useGpuDirectStorage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", "tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", "tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDebugConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", "tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", "tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", "tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", "tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", "tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", "tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", "tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", "tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", "tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", "tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", "tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", "tensorrt_llm::executor::ExecutorConfig::mDebugConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", "tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", "tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", "tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", "tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", "tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", "tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", "tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs::additionalModelOutputs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig::cacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig::debugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig::decodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap::enableTrtOverlap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig::extendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits::gatherGenerationLogits"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent::gpuWeightsPercent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig::guidedDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig::logitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize::maxBatchSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens::maxNumTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize::maxQueueSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds::maxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading::promptTableOffloading"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs::recvPollPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig::specDecConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage::useGpuDirectStorage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::enableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::multiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize::cacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode::cudaGraphMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc::enableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode::multiBlockMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", "tensorrt_llm::executor::ExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::acceptanceThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::fastLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::logits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::tokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", "tensorrt_llm::executor::FinishReason"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", "tensorrt_llm::executor::FinishReason::kCANCELLED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", "tensorrt_llm::executor::FinishReason::kEND_ID"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", "tensorrt_llm::executor::FinishReason::kLENGTH"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", "tensorrt_llm::executor::FinishReason::kNOT_FINISHED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", "tensorrt_llm::executor::FinishReason::kSTOP_WORDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", "tensorrt_llm::executor::FinishReason::kTIMED_OUT"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", "tensorrt_llm::executor::GuidedDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::backend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::encodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::stopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", "tensorrt_llm::executor::GuidedDecodingConfig::getBackend"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", "tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", "tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", "tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::mBackend"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", "tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", "tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", "tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab::encodedVocab"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds::stopTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", "tensorrt_llm::executor::GuidedDecodingConfig::validate"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", "tensorrt_llm::executor::GuidedDecodingParams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guide"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuide"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuideType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", "tensorrt_llm::executor::GuidedDecodingParams::mGuide"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::mGuideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", "tensorrt_llm::executor::IterationStats::crossKvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", "tensorrt_llm::executor::IterationStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", "tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", "tensorrt_llm::executor::IterationStats::maxBatchSizeStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", "tensorrt_llm::executor::IterationStats::maxNumTokensRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", "tensorrt_llm::executor::IterationStats::maxNumTokensStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", "tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", "tensorrt_llm::executor::IterationStats::numCompletedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", "tensorrt_llm::executor::IterationStats::numNewActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", "tensorrt_llm::executor::IterationStats::numQueuedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12specDecStatsE", "tensorrt_llm::executor::IterationStats::specDecStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", "tensorrt_llm::executor::KVCacheCreatedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", "tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", "tensorrt_llm::executor::KVCacheEvent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::eventId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", "tensorrt_llm::executor::KVCacheEvent::data"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", "tensorrt_llm::executor::KVCacheEvent::eventId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", "tensorrt_llm::executor::KVCacheEventData"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", "tensorrt_llm::executor::KVCacheEventDiff::newValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", "tensorrt_llm::executor::KVCacheEventDiff::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", "tensorrt_llm::executor::KVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager::kvCacheManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents::timeout"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", "tensorrt_llm::executor::KVCacheEventManager::kvCacheManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", "tensorrt_llm::executor::KVCacheRemovedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", "tensorrt_llm::executor::KVCacheRemovedData::blockHashes"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", "tensorrt_llm::executor::KVCacheStoredBlockData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::loraId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::tokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", "tensorrt_llm::executor::KVCacheStoredBlockData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", "tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", "tensorrt_llm::executor::KVCacheStoredBlockData::loraId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", "tensorrt_llm::executor::KVCacheStoredBlockData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", "tensorrt_llm::executor::KVCacheStoredBlockData::tokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", "tensorrt_llm::executor::KVCacheStoredData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", "tensorrt_llm::executor::KVCacheStoredData::blocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", "tensorrt_llm::executor::KVCacheStoredData::parentHash"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", "tensorrt_llm::executor::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", "tensorrt_llm::executor::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::oldValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", "tensorrt_llm::executor::KVCacheUpdatedData::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::copyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::crossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::eventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::runtimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::secondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults::runtimeDefaults"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", "tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", "tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", "tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", "tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", "tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", "tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse::copyOnPartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction::crossKvCacheFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse::enableBlockReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse::enablePartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize::eventBufferMaxSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction::freeGpuMemoryFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize::hostCacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec::maxAttentionWindowVec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens::maxTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks::onboardBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority::secondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength::sinkTokenLength"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeDurationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeRetentionPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::directory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::tokenRangeRetentionPriorities"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::transferMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator==::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::blockSize"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::seqLen"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", "tensorrt_llm::executor::KvCacheStats::allocNewBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", "tensorrt_llm::executor::KvCacheStats::allocTotalBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", "tensorrt_llm::executor::KvCacheStats::cacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", "tensorrt_llm::executor::KvCacheStats::missedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", "tensorrt_llm::executor::KvCacheStats::reusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", "tensorrt_llm::executor::KvCacheTransferMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", "tensorrt_llm::executor::KvCacheTransferMode::DRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", "tensorrt_llm::executor::KvCacheTransferMode::GDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", "tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorBatched"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", "tensorrt_llm::executor::LogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::replicate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched::processorBatched"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap::processorMap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate::replicate"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", "tensorrt_llm::executor::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", "tensorrt_llm::executor::LookaheadDecodingConfig::get"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE::that"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", "tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", "tensorrt_llm::executor::MillisecondsType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", "tensorrt_llm::executor::ModelType::kENCODER_DECODER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", "tensorrt_llm::executor::ModelType::kENCODER_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", "tensorrt_llm::executor::MropeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropePositionDeltas"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropeRoratySinCos"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", "tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", "tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", "tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", "tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", "tensorrt_llm::executor::OrchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::isOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::orchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::spawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::workerExecutablePath"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", "tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", "tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", "tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", "tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", "tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", "tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", "tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", "tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator::isOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm::orchLeaderComm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses::spawnProcesses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath::workerExecutablePath"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnEncoderOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", "tensorrt_llm::executor::OutputConfig::additionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", "tensorrt_llm::executor::OutputConfig::returnEncoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", "tensorrt_llm::executor::OutputConfig::returnPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::numNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", "tensorrt_llm::executor::ParallelConfig::getNumNodes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", "tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", "tensorrt_llm::executor::ParallelConfig::mNumNodes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", "tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes::numNodes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig::orchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::loraPrefetchDir"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", "tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", "tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", "tensorrt_llm::executor::PriorityType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::inputTokenExtraIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", "tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", "tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::allottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::crossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::encoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::externalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::languageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::lookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::mRopeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::multimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::positionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::returnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::skipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", "tensorrt_llm::executor::Request::getAdditionalOutputNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", "tensorrt_llm::executor::Request::getAllottedTimeMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", "tensorrt_llm::executor::Request::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", "tensorrt_llm::executor::Request::getContextPhaseParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", "tensorrt_llm::executor::Request::getCrossAttentionMask"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", "tensorrt_llm::executor::Request::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", "tensorrt_llm::executor::Request::getEncoderInputFeatures"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", "tensorrt_llm::executor::Request::getEncoderInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", "tensorrt_llm::executor::Request::getEncoderOutputLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", "tensorrt_llm::executor::Request::getExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", "tensorrt_llm::executor::Request::getGuidedDecodingParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", "tensorrt_llm::executor::Request::getKvCacheRetentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", "tensorrt_llm::executor::Request::getLanguageAdapterUid"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", "tensorrt_llm::executor::Request::getLogitsPostProcessor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", "tensorrt_llm::executor::Request::getLookaheadConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", "tensorrt_llm::executor::Request::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", "tensorrt_llm::executor::Request::getMropeConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", "tensorrt_llm::executor::Request::getMultimodalEmbedding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", "tensorrt_llm::executor::Request::getPositionIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", "tensorrt_llm::executor::Request::getPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", "tensorrt_llm::executor::Request::getRequestType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", "tensorrt_llm::executor::Request::getReturnAllGeneratedTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", "tensorrt_llm::executor::Request::getSkipCrossAttnBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", "tensorrt_llm::executor::Request::kBatchedPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", "tensorrt_llm::executor::Request::kDefaultPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", "tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs::allottedTimeMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId::clientId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams::contextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask::crossAttentionMask"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig::eagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures::encoderInputFeatures"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds::encoderInputTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength::encoderOutputLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig::externalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams::guidedDecodingParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig::kvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid::languageAdapterUid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor::logitsPostProcessor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig::lookaheadConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig::mRopeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding::multimodalEmbedding"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds::positionIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType::requestType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens::returnAllGeneratedTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", "tensorrt_llm::executor::RequestPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::RequestPerfMetrics::TimePoint"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", "tensorrt_llm::executor::RequestPerfMetrics::firstIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", "tensorrt_llm::executor::RequestPerfMetrics::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", "tensorrt_llm::executor::RequestPerfMetrics::lastIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", "tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::timingMetrics"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", "tensorrt_llm::executor::RequestStats::disServingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", "tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::missedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", "tensorrt_llm::executor::RequestType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::Result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", "tensorrt_llm::executor::Response::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", "tensorrt_llm::executor::Result::additionalOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", "tensorrt_llm::executor::Result::contextPhaseParams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", "tensorrt_llm::executor::Result::decodingIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", "tensorrt_llm::executor::Result::encoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", "tensorrt_llm::executor::Result::finishReasons"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", "tensorrt_llm::executor::Result::isSequenceFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", "tensorrt_llm::executor::Result::requestPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", "tensorrt_llm::executor::Result::sequenceIndex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", "tensorrt_llm::executor::Result::specDecFastLogitsInfo"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", "tensorrt_llm::executor::RetentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", "tensorrt_llm::executor::RetentionPriorityAndDuration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::retentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", "tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", "tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::noRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::seed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty::repetitionpenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidthArray"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", "tensorrt_llm::executor::SamplingConfig::getMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", "tensorrt_llm::executor::SamplingConfig::getMinTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", "tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnBeams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnSequences"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", "tensorrt_llm::executor::SamplingConfig::getSeed"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", "tensorrt_llm::executor::SamplingConfig::mBeamWidthArray"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", "tensorrt_llm::executor::SamplingConfig::mMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", "tensorrt_llm::executor::SamplingConfig::mMinTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", "tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", "tensorrt_llm::executor::SamplingConfig::mNumReturnBeams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", "tensorrt_llm::executor::SamplingConfig::mNumReturnSequences"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", "tensorrt_llm::executor::SamplingConfig::mSeed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty::frequencyPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty::presencePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty::repetitionPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed::seed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::capacitySchedulerPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::contextChunkingPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::dynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", "tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", "tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", "tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", "tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13SerializationE", "tensorrt_llm::executor::Serialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize::iterStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize::requestStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize::responses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::stats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize::disServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize::specDecStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize::tokenRangeRetentionConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", "tensorrt_llm::executor::Shape::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType32E", "tensorrt_llm::executor::SizeType32"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType64E", "tensorrt_llm::executor::SizeType64"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", "tensorrt_llm::executor::SpecDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", "tensorrt_llm::executor::SpecDecodingStats::acceptanceLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", "tensorrt_llm::executor::SpecDecodingStats::draftOverhead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", "tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", "tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::fastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 2, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", "tensorrt_llm::executor::VecTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", "tensorrt_llm::executor::detail::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", "tensorrt_llm::executor::disagg_executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasContextAwaitThreads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasGenAwaitThreads"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::contextIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::timeout"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::genIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::selectContextId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::globalRequestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::selectGenIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", "tensorrt_llm::executor::disagg_executor::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::gid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", "tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc::backendAgentDesc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", "tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", "tensorrt_llm::executor::kv_cache::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::agentName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::connectionInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", "tensorrt_llm::executor::kv_cache::AgentState::mAgentName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", "tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", "tensorrt_llm::executor::kv_cache::AgentState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::memoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::connectRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::connectRemoteAgent::connectionInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::connectRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getConnectionInfoEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getConnectionInfo"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::agentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::syncMessage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::kvFactor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::modelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbAttentionLayers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeadPerLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::worldConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", "tensorrt_llm::executor::kv_cache::CacheState::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getModelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", "tensorrt_llm::executor::kv_cache::CacheState::mDataType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", "tensorrt_llm::executor::kv_cache::CacheState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", "tensorrt_llm::executor::kv_cache::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::agentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::ip"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::port"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::ranks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::socketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::getAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::getMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", "tensorrt_llm::executor::kv_cache::CommState::getSelfIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::getSocketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::isAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::isMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::isSocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", "tensorrt_llm::executor::kv_cache::CommState::mSelfIdx"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", "tensorrt_llm::executor::kv_cache::CommState::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", "tensorrt_llm::executor::kv_cache::CommState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", "tensorrt_llm::executor::kv_cache::Connection"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", "tensorrt_llm::executor::kv_cache::Connection::isThreadSafe"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::size"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", "tensorrt_llm::executor::kv_cache::Connection::~Connection"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", "tensorrt_llm::executor::kv_cache::ConnectionInfoType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", "tensorrt_llm::executor::kv_cache::ConnectionManager"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::ctx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", "tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", "tensorrt_llm::executor::kv_cache::DataContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext::tag"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", "tensorrt_llm::executor::kv_cache::DataContext::getTag"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", "tensorrt_llm::executor::kv_cache::DataContext::mTag"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", "tensorrt_llm::executor::kv_cache::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::handle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::symbol"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::FunctionT"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::funcName"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::libName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", "tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", "tensorrt_llm::executor::kv_cache::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::vec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize::is"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::memoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::os"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize::memoryDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::descs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", "tensorrt_llm::executor::kv_cache::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", "tensorrt_llm::executor::kv_cache::MemoryType::kBLK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kDRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", "tensorrt_llm::executor::kv_cache::MemoryType::kFILE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", "tensorrt_llm::executor::kv_cache::MemoryType::kOBJ"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kVRAM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", "tensorrt_llm::executor::kv_cache::MpiState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", "tensorrt_llm::executor::kv_cache::MpiState::mRanks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", "tensorrt_llm::executor::kv_cache::MpiState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", "tensorrt_llm::executor::kv_cache::RegisterDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", "tensorrt_llm::executor::kv_cache::SocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", "tensorrt_llm::executor::kv_cache::SocketState::mIp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", "tensorrt_llm::executor::kv_cache::SocketState::mPort"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", "tensorrt_llm::executor::kv_cache::SocketState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", "tensorrt_llm::executor::kv_cache::SyncMessage"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", "tensorrt_llm::executor::kv_cache::TransferDescs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", "tensorrt_llm::executor::kv_cache::TransferOp"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", "tensorrt_llm::executor::kv_cache::TransferOp::kREAD"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", "tensorrt_llm::executor::kv_cache::TransferOp::kWRITE"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", "tensorrt_llm::executor::kv_cache::TransferRequest"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::dstDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::op"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::remoteName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::srcDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::syncMessage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getOp"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", "tensorrt_llm::executor::kv_cache::TransferRequest::mOp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", "tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", "tensorrt_llm::executor::kv_cache::TransferStatus"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", "tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", "tensorrt_llm::executor::kv_cache::TransferStatus::wait"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", "tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7versionEv", "tensorrt_llm::executor::version"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm3mpiE", "tensorrt_llm::mpi"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", "tensorrt_llm::runtime::AllReduceBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::fakeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", "tensorrt_llm::runtime::AllReduceBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", "tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", "tensorrt_llm::runtime::BufferDataType::getSizeInBits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", "tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", "tensorrt_llm::runtime::BufferManager::mPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::endIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::acceptedLens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::acceptedPathIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::acceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::chunkedContextNextTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::lastDraftLens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::lastDraftPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::lastDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::nextDraftLens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::nextDraftPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::nextDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::seqSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", "tensorrt_llm::runtime::DecodingInput::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", "tensorrt_llm::runtime::DecodingInput::badWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", "tensorrt_llm::runtime::DecodingInput::beamWidths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", "tensorrt_llm::runtime::DecodingInput::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", "tensorrt_llm::runtime::DecodingInput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", "tensorrt_llm::runtime::DecodingInput::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", "tensorrt_llm::runtime::DecodingInput::logits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::lookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", "tensorrt_llm::runtime::DecodingInput::stopWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::gatheredIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::ids"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", "tensorrt_llm::runtime::DecodingOutput::eagleBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", "tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", "tensorrt_llm::runtime::DecodingOutput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", "tensorrt_llm::runtime::DecodingOutput::gatheredIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", "tensorrt_llm::runtime::DecodingOutput::logProbsTiled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", "tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", "tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", "tensorrt_llm::runtime::DeviceAllocationNvls::_capacity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", "tensorrt_llm::runtime::DeviceAllocationNvls::_handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", "tensorrt_llm::runtime::DeviceAllocationNvls::free"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", "tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", "tensorrt_llm::runtime::EagleBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", "tensorrt_llm::runtime::EagleBuffers::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", "tensorrt_llm::runtime::EagleBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", "tensorrt_llm::runtime::EagleBuffers::Inputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", "tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", "tensorrt_llm::runtime::EagleBuffers::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", "tensorrt_llm::runtime::EagleBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", "tensorrt_llm::runtime::EagleBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", "tensorrt_llm::runtime::EagleBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", "tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", "tensorrt_llm::runtime::EagleBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", "tensorrt_llm::runtime::EagleBuffers::engineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", "tensorrt_llm::runtime::EagleBuffers::greedySamplingHost"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", "tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", "tensorrt_llm::runtime::EagleBuffers::maxGenerationLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::eagleModule"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", "tensorrt_llm::runtime::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxNonLeafNodesPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::numTransformersLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", "tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", "tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", "tensorrt_llm::runtime::EagleModule::getNumTransformerLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", "tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", "tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", "tensorrt_llm::runtime::EagleModule::mNumTransformersLayer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::explicitDraftTokensModule"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", "tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", "tensorrt_llm::runtime::GptDecoder::mDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE", "tensorrt_llm::runtime::GptDecoder::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", "tensorrt_llm::runtime::GptDecoder::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoder::mVocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::requests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", "tensorrt_llm::runtime::GptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::GptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE", "tensorrt_llm::runtime::GptDecoderBatched::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::output"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", "tensorrt_llm::runtime::GptDecoderBatched::getBufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecoderStateEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", "tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatched::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecoderStateE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::prepareForward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::prepareForward::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::prepareForward::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14prepareForwardE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::prepareForward::step"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::setEagleInputs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::setEagleInputs::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::setExplicitDraftTokensInputs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::setExplicitDraftTokensInputs::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxTokensPerStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::runtimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", "tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", "tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", "tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName::dataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::requests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", "tensorrt_llm::runtime::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", "tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::IGptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxTokensPerStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", "tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", "tensorrt_llm::runtime::ITensor::DimType64"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", "tensorrt_llm::runtime::ITensor::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::sliceN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::tensor"], [1, 3, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension"], [1, 8, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension::n"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", "tensorrt_llm::runtime::IpcMemory::BufferPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::openIpc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", "tensorrt_llm::runtime::IpcMemory::mBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", "tensorrt_llm::runtime::IpcMemory::mOpenIpc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", "tensorrt_llm::runtime::IpcMemory::mTpRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", "tensorrt_llm::runtime::IpcNvlsHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_va"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", "tensorrt_llm::runtime::IpcNvlsHandle::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_va"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", "tensorrt_llm::runtime::LookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxTokensPerStep"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", "tensorrt_llm::runtime::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", "tensorrt_llm::runtime::LookaheadModule::getExecutionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", "tensorrt_llm::runtime::LookaheadModule::mExecutionConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig::config"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::tokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numGenSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::tokensPerStep"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::decoderLookaheadBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::runtime"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", "tensorrt_llm::runtime::LoraCacheFullException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", "tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", "tensorrt_llm::runtime::LoraExpectedException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", "tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numExperts"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::isDora"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::tpSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", "tensorrt_llm::runtime::LoraTaskIdType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier::ranks"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", "tensorrt_llm::runtime::MedusaModule"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::MedusaChoices"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxAcceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", "tensorrt_llm::runtime::MedusaModule::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", "tensorrt_llm::runtime::MedusaModule::getMedusaChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", "tensorrt_llm::runtime::MemoryCounters::SizeType32"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPool"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", "tensorrt_llm::runtime::MemoryType::kPINNEDPOOL"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", "tensorrt_llm::runtime::ModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::KVCacheType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString::value"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", "tensorrt_llm::runtime::ModelConfig::LayerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", "tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", "tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", "tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", "tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbAttentionLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbRnnLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", "tensorrt_llm::runtime::ModelConfig::ModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", "tensorrt_llm::runtime::ModelConfig::RnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits::computeContextLogits"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", "tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding::maxDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", "tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", "tensorrt_llm::runtime::ModelConfig::getKVCacheType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getKvDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", "tensorrt_llm::runtime::ModelConfig::getLayerTypes"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", "tensorrt_llm::runtime::ModelConfig::getLogitsDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::ModelConfig::getLoraModules"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", "tensorrt_llm::runtime::ModelConfig::getManageWeightsType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxInputLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::ModelConfig::getMaxLoraRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxNumTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", "tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", "tensorrt_llm::runtime::ModelConfig::getModelName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", "tensorrt_llm::runtime::ModelConfig::getModelVariant"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::ModelConfig::getNbHeads"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads::layerIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", "tensorrt_llm::runtime::ModelConfig::getNumLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", "tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", "tensorrt_llm::runtime::ModelConfig::getPpReduceScatter"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", "tensorrt_llm::runtime::ModelConfig::getQuantMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::getRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", "tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::ModelConfig::getSizePerHead"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::ModelConfig::getTokensPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::ModelConfig::getVocabSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded::worldSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::hasRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isContinuousKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", "tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", "tensorrt_llm::runtime::ModelConfig::isMultiModal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isPagedKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", "tensorrt_llm::runtime::ModelConfig::isRnnBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::ModelConfig::isTransformerBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", "tensorrt_llm::runtime::ModelConfig::isWhisper"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", "tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", "tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", "tensorrt_llm::runtime::ModelConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", "tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", "tensorrt_llm::runtime::ModelConfig::mInputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::mKVCacheType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", "tensorrt_llm::runtime::ModelConfig::mLayerTypes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", "tensorrt_llm::runtime::ModelConfig::mLogitsDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", "tensorrt_llm::runtime::ModelConfig::mLoraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::mManageWeightsType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", "tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::ModelConfig::mMaxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::ModelConfig::mMaxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::ModelConfig::mMaxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", "tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", "tensorrt_llm::runtime::ModelConfig::mModelName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", "tensorrt_llm::runtime::ModelConfig::mModelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", "tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", "tensorrt_llm::runtime::ModelConfig::mNbHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", "tensorrt_llm::runtime::ModelConfig::mNbLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", "tensorrt_llm::runtime::ModelConfig::mNbRnnLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", "tensorrt_llm::runtime::ModelConfig::mNumLanguages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", "tensorrt_llm::runtime::ModelConfig::mPagedState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", "tensorrt_llm::runtime::ModelConfig::mPpReduceScatter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", "tensorrt_llm::runtime::ModelConfig::mQuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", "tensorrt_llm::runtime::ModelConfig::mRnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", "tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::ModelConfig::mSizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", "tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::ModelConfig::mTokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", "tensorrt_llm::runtime::ModelConfig::mUseCrossAttention"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", "tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", "tensorrt_llm::runtime::ModelConfig::mUseMrope"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", "tensorrt_llm::runtime::ModelConfig::mUseShapeInference"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", "tensorrt_llm::runtime::ModelConfig::mVocabSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA::contextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize::encoderHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType::kvCacheType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes::layerTypes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules::loraModules"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType::manageWeightType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize::maxBatchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen::maxEncoderLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen::maxInputLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank::maxLoraRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens::maxNumTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings::maxPositionEmbeddings"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName::modelName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant::modelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages::numLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter::ppReduceScatter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode::QuantMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig::rnnConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim::rotaryEmbeddingDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead::sizePerHead"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode::mode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule::speculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding::usePositionEmbedding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference::useShapeInference"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding::useTokenTypeEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", "tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::ModelConfig::supportsInflightBatching"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", "tensorrt_llm::runtime::ModelConfig::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", "tensorrt_llm::runtime::ModelConfig::useLanguageAdapter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin::useLoraPlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", "tensorrt_llm::runtime::ModelConfig::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput::inputPacked"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState::pagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::usePositionEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::ModelConfig::usePromptTuning"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", "tensorrt_llm::runtime::ModelConfig::useShapeInference"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", "tensorrt_llm::runtime::PromptTuningParams::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", "tensorrt_llm::runtime::RawEngine"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::HostMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineAddr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine::enginePath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", "tensorrt_llm::runtime::RawEngine::Type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::Type::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::Type::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::Type::HostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", "tensorrt_llm::runtime::RawEngine::getAddress"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", "tensorrt_llm::runtime::RawEngine::getHostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", "tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", "tensorrt_llm::runtime::RawEngine::getPath"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", "tensorrt_llm::runtime::RawEngine::getPathOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", "tensorrt_llm::runtime::RawEngine::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", "tensorrt_llm::runtime::RawEngine::getType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", "tensorrt_llm::runtime::RawEngine::mEngineAddr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", "tensorrt_llm::runtime::RawEngine::mEngineBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", "tensorrt_llm::runtime::RawEngine::mEnginePath"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", "tensorrt_llm::runtime::RawEngine::mEngineSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", "tensorrt_llm::runtime::RawEngine::mManagedWeightsMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", "tensorrt_llm::runtime::RawEngine::mType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap::managedWeightsMap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath::enginePath"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", "tensorrt_llm::runtime::RequestType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", "tensorrt_llm::runtime::RequestType::kCONTEXT"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", "tensorrt_llm::runtime::RequestType::kGENERATION"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::runtime::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::maxAttentionWindowVec"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", "tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", "tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::externalDraftTokensConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", "tensorrt_llm::runtime::SamplingConfig::beamWidthArray"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", "tensorrt_llm::runtime::SamplingConfig::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::defaultValue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", "tensorrt_llm::runtime::SamplingConfig::minP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", "tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", "tensorrt_llm::runtime::SamplingConfig::numReturnSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", "tensorrt_llm::runtime::SamplingConfig::originalTemperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", "tensorrt_llm::runtime::SamplingConfig::outputLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::defaultValue"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::vec"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", "tensorrt_llm::runtime::SamplingConfig::validate"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::max"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::min"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::name"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::vec"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", "tensorrt_llm::runtime::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", "tensorrt_llm::runtime::SizeType64"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", "tensorrt_llm::runtime::SpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::None"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode::state"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", "tensorrt_llm::runtime::SpeculativeDecodingMode::mState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator==::other"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", "tensorrt_llm::runtime::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator=::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen::maxDraftPathLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens::maxDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths::maxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", "tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", "tensorrt_llm::runtime::TokenExtraIdType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::runtime::UniqueToken"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", "tensorrt_llm::runtime::UniqueToken::tokenExtraId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", "tensorrt_llm::runtime::UniqueToken::tokenId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", "tensorrt_llm::runtime::VecTokenExtraIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", "tensorrt_llm::runtime::VecUniqueTokens"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", "tensorrt_llm::runtime::WorldConfig::enableAttentionDP"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", "tensorrt_llm::runtime::WorldConfig::getLocalRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", "tensorrt_llm::runtime::WorldConfig::getNodeRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", "tensorrt_llm::runtime::WorldConfig::isContextParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", "tensorrt_llm::runtime::WorldConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", "tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer::worldConfig"], [1, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxSequenceLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", "tensorrt_llm::runtime::decoder::DecoderState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState::dtype"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", "tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", "tensorrt_llm::runtime::decoder::DecoderState::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", "tensorrt_llm::runtime::decoder::DecoderState::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishedStepsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSteps"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBatchSizeEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getParentIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", "tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14mFinishedStepsE", "tensorrt_llm::runtime::decoder::DecoderState::mFinishedSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBatchSizeE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::numTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState10setupEagleEN12EagleBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupEagle"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState10setupEagleEN12EagleBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupEagle::eagleBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupExplicitDraftTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupExplicitDraftTokens::explicitDraftTokensBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14setupLookaheadE24LookaheadDecodingBuffers", "tensorrt_llm::runtime::decoder::DecoderState::setupLookahead"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14setupLookaheadE24LookaheadDecodingBuffers", "tensorrt_llm::runtime::decoder::DecoderState::setupLookahead::lookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", "tensorrt_llm::runtime::decoder_batch::Input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", "tensorrt_llm::runtime::decoder_batch::Input::Input::maxDecoderSteps"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input10batchSlotsE", "tensorrt_llm::runtime::decoder_batch::Input::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input22batchSlotsRequestOrderE", "tensorrt_llm::runtime::decoder_batch::Input::batchSlotsRequestOrder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input11eagleInputsE", "tensorrt_llm::runtime::decoder_batch::Input::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15eagleLastInputsE", "tensorrt_llm::runtime::decoder_batch::Input::eagleLastInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input25explicitDraftTokensInputsE", "tensorrt_llm::runtime::decoder_batch::Input::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input29explicitDraftTokensLastInputsE", "tensorrt_llm::runtime::decoder_batch::Input::explicitDraftTokensLastInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15generationStepsE", "tensorrt_llm::runtime::decoder_batch::Input::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", "tensorrt_llm::runtime::decoder_batch::Input::logits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15maxDecoderStepsE", "tensorrt_llm::runtime::decoder_batch::Input::maxDecoderSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input20predictedDraftLogitsE", "tensorrt_llm::runtime::decoder_batch::Input::predictedDraftLogits"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", "tensorrt_llm::runtime::decoder_batch::Output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output6OutputEv", "tensorrt_llm::runtime::decoder_batch::Output::Output"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Output::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Output::cacheIndirection"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", "tensorrt_llm::runtime::decoder_batch::Request"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", "tensorrt_llm::runtime::decoder_batch::Request::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::ids"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::inputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::maxNewTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::badWordsList"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE", "tensorrt_llm::runtime::decoder_batch::Request::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", "tensorrt_llm::runtime::decoder_batch::Request::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5dtypeE", "tensorrt_llm::runtime::decoder_batch::Request::dtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11eagleConfigE", "tensorrt_llm::runtime::decoder_batch::Request::eagleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", "tensorrt_llm::runtime::decoder_batch::Request::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", "tensorrt_llm::runtime::decoder_batch::Request::endId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE", "tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerEngineStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", "tensorrt_llm::runtime::decoder_batch::Request::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", "tensorrt_llm::runtime::decoder_batch::Request::inputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request22lookaheadRuntimeConfigE", "tensorrt_llm::runtime::decoder_batch::Request::lookaheadRuntimeConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", "tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE", "tensorrt_llm::runtime::decoder_batch::Request::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE", "tensorrt_llm::runtime::decoder_batch::Request::medusaTreeIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::stopWordsList"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots::batchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", "tensorrt_llm::runtime::ipcNvlsSupported"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_0"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_1"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_2"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [87, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[82, 9, 0, "-", "functional"], [84, 9, 0, "-", "models"], [85, 9, 0, "-", "plugin"], [86, 9, 0, "-", "quantization"], [87, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[82, 10, 1, "", "AllReduceFusionOp"], [82, 10, 1, "", "AllReduceParams"], [82, 10, 1, "", "AllReduceStrategy"], [82, 10, 1, "", "AttentionMaskType"], [82, 10, 1, "", "Conditional"], [82, 10, 1, "", "DimRange"], [82, 10, 1, "", "LayerNormPositionType"], [82, 10, 1, "", "LayerNormType"], [82, 10, 1, "", "MLPType"], [82, 10, 1, "", "PositionEmbeddingType"], [82, 10, 1, "", "RopeEmbeddingUtils"], [82, 10, 1, "", "RotaryScalingType"], [82, 10, 1, "", "SideStreamIDType"], [82, 10, 1, "", "SliceInputType"], [82, 10, 1, "", "Tensor"], [82, 14, 1, "", "abs"], [82, 14, 1, "", "activation"], [82, 14, 1, "", "add"], [82, 14, 1, "", "allgather"], [82, 14, 1, "", "allreduce"], [82, 14, 1, "", "arange"], [82, 14, 1, "", "argmax"], [82, 14, 1, "", "assertion"], [82, 14, 1, "", "avg_pool2d"], [82, 14, 1, "", "bert_attention"], [82, 14, 1, "", "broadcast_helper"], [82, 14, 1, "", "cast"], [82, 14, 1, "", "categorical_sample"], [82, 14, 1, "", "chunk"], [82, 14, 1, "", "clip"], [82, 14, 1, "", "concat"], [82, 14, 1, "", "constant"], [82, 14, 1, "", "constant_to_tensor_"], [82, 14, 1, "", "constants_to_tensors_"], [82, 14, 1, "", "conv1d"], [82, 14, 1, "", "conv2d"], [82, 14, 1, "", "conv3d"], [82, 14, 1, "", "conv_transpose2d"], [82, 14, 1, "", "cos"], [82, 14, 1, "", "cp_split_plugin"], [82, 14, 1, "", "create_allreduce_plugin"], [82, 14, 1, "", "cuda_stream_sync"], [82, 14, 1, "", "cumsum"], [82, 14, 1, "", "div"], [82, 14, 1, "", "dora_plugin"], [82, 14, 1, "", "einsum"], [82, 14, 1, "", "elementwise_binary"], [82, 14, 1, "", "embedding"], [82, 14, 1, "", "eq"], [82, 14, 1, "", "exp"], [82, 14, 1, "", "expand"], [82, 14, 1, "", "expand_dims"], [82, 14, 1, "", "expand_dims_like"], [82, 14, 1, "", "expand_mask"], [82, 14, 1, "", "flatten"], [82, 14, 1, "", "flip"], [82, 14, 1, "", "floordiv"], [82, 14, 1, "", "gather"], [82, 14, 1, "", "gather_last_token_logits"], [82, 14, 1, "", "gather_nd"], [82, 14, 1, "", "gegelu"], [82, 14, 1, "", "geglu"], [82, 14, 1, "", "gelu"], [82, 14, 1, "", "gemm_allreduce"], [82, 14, 1, "", "gemm_swiglu"], [82, 14, 1, "", "generate_alibi_biases"], [82, 14, 1, "", "generate_alibi_slopes"], [82, 14, 1, "", "generate_logn_scaling"], [82, 14, 1, "", "gpt_attention"], [82, 14, 1, "", "group_norm"], [82, 14, 1, "", "gt"], [82, 14, 1, "", "identity"], [82, 14, 1, "", "index_select"], [82, 14, 1, "", "int_clip"], [82, 14, 1, "", "interpolate"], [82, 14, 1, "", "is_gated_activation"], [82, 14, 1, "", "layer_norm"], [82, 14, 1, "", "log"], [82, 14, 1, "", "log_softmax"], [82, 14, 1, "", "lora_plugin"], [82, 14, 1, "", "low_latency_gemm"], [82, 14, 1, "", "low_latency_gemm_swiglu"], [82, 14, 1, "", "lt"], [82, 14, 1, "", "mamba_conv1d"], [82, 14, 1, "", "masked_scatter"], [82, 14, 1, "", "masked_select"], [82, 14, 1, "", "matmul"], [82, 14, 1, "", "max"], [82, 14, 1, "", "maximum"], [82, 14, 1, "", "mean"], [82, 14, 1, "", "meshgrid2d"], [82, 14, 1, "", "min"], [82, 14, 1, "", "minimum"], [82, 14, 1, "", "modulo"], [82, 14, 1, "", "mul"], [82, 14, 1, "", "non_gated_version"], [82, 14, 1, "", "nonzero"], [82, 14, 1, "", "not_op"], [82, 14, 1, "", "op_and"], [82, 14, 1, "", "op_or"], [82, 14, 1, "", "op_xor"], [82, 14, 1, "", "outer"], [82, 14, 1, "", "pad"], [82, 14, 1, "", "permute"], [82, 14, 1, "", "pow"], [82, 14, 1, "", "prod"], [82, 14, 1, "", "quick_gelu"], [82, 14, 1, "", "rand"], [82, 14, 1, "", "rearrange"], [82, 14, 1, "", "recv"], [82, 14, 1, "", "reduce"], [82, 14, 1, "", "reduce_scatter"], [82, 14, 1, "", "relu"], [82, 14, 1, "", "repeat"], [82, 14, 1, "", "repeat_interleave"], [82, 14, 1, "", "rg_lru"], [82, 14, 1, "", "rms_norm"], [82, 14, 1, "", "round"], [82, 14, 1, "", "scatter"], [82, 14, 1, "", "scatter_nd"], [82, 14, 1, "", "select"], [82, 14, 1, "", "selective_scan"], [82, 14, 1, "", "send"], [82, 14, 1, "", "shape"], [82, 14, 1, "", "sigmoid"], [82, 14, 1, "", "silu"], [82, 14, 1, "", "sin"], [82, 14, 1, "", "slice"], [82, 14, 1, "", "softmax"], [82, 14, 1, "", "softplus"], [82, 14, 1, "", "split"], [82, 14, 1, "", "sqrt"], [82, 14, 1, "", "squared_relu"], [82, 14, 1, "", "squeeze"], [82, 14, 1, "", "stack"], [82, 14, 1, "", "sub"], [82, 14, 1, "", "sum"], [82, 14, 1, "", "swiglu"], [82, 14, 1, "", "tanh"], [82, 14, 1, "", "topk"], [82, 14, 1, "", "transpose"], [82, 14, 1, "", "unary"], [82, 14, 1, "", "unbind"], [82, 14, 1, "", "unsqueeze"], [82, 14, 1, "", "view"], [82, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceFusionOp": [[82, 11, 1, "", "LAST_PROCESS_FOR_UB"], [82, 11, 1, "", "MOE_ALLREDUCE_RESIDUAL_RMS_NORM"], [82, 11, 1, "", "NONE"], [82, 11, 1, "", "RESIDUAL_RMS_NORM"], [82, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_FP8"], [82, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"], [82, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_FP8"], [82, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_NVFP4"], [82, 11, 1, "", "RESIDUAL_RMS_PREPOST_NORM"]], "tensorrt_llm.functional.AllReduceParams": [[82, 12, 1, "", "has_affine"], [82, 12, 1, "", "has_bias"], [82, 12, 1, "", "has_scale"], [82, 12, 1, "", "update_strategy"]], "tensorrt_llm.functional.AllReduceStrategy": [[82, 11, 1, "", "AUTO"], [82, 11, 1, "", "LOWPRECISION"], [82, 11, 1, "", "MIN_LATENCY"], [82, 11, 1, "", "NCCL"], [82, 11, 1, "", "ONESHOT"], [82, 11, 1, "", "TWOSHOT"], [82, 11, 1, "", "UB"]], "tensorrt_llm.functional.AttentionMaskType": [[82, 11, 1, "", "bidirectional"], [82, 11, 1, "", "bidirectionalglm"], [82, 11, 1, "", "blocksparse"], [82, 11, 1, "", "causal"], [82, 11, 1, "", "custom_mask"], [82, 11, 1, "", "padding"], [82, 11, 1, "", "sliding_window_causal"]], "tensorrt_llm.functional.Conditional": [[82, 12, 1, "", "add_input"], [82, 12, 1, "", "add_output"]], "tensorrt_llm.functional.LayerNormPositionType": [[82, 11, 1, "", "post_layernorm"], [82, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[82, 11, 1, "", "GroupNorm"], [82, 11, 1, "", "LayerNorm"], [82, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[82, 11, 1, "", "FusedGatedMLP"], [82, 11, 1, "", "GatedMLP"], [82, 11, 1, "", "MLP"]], "tensorrt_llm.functional.PositionEmbeddingType": [[82, 11, 1, "", "alibi"], [82, 11, 1, "", "alibi_with_scale"], [82, 11, 1, "", "chatglm"], [82, 12, 1, "", "choices"], [82, 11, 1, "", "deferred"], [82, 12, 1, "", "from_string"], [82, 12, 1, "", "is_alibi"], [82, 12, 1, "", "is_deferred"], [82, 12, 1, "", "is_mrope"], [82, 12, 1, "", "is_rope"], [82, 11, 1, "", "learned_absolute"], [82, 11, 1, "", "long_rope"], [82, 11, 1, "", "mrope"], [82, 11, 1, "", "relative"], [82, 11, 1, "", "rope_gpt_neox"], [82, 11, 1, "", "rope_gptj"], [82, 11, 1, "", "yarn"]], "tensorrt_llm.functional.RopeEmbeddingUtils": [[82, 12, 1, "", "apply_llama3_scaling"], [82, 12, 1, "", "apply_rotary_pos_emb"], [82, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [82, 12, 1, "", "apply_rotary_pos_emb_cogvlm"], [82, 12, 1, "", "create_fake_weight"], [82, 12, 1, "", "create_sinusoidal_positions"], [82, 12, 1, "", "create_sinusoidal_positions_for_attention_plugin"], [82, 12, 1, "", "create_sinusoidal_positions_for_cogvlm_attention_plugin"], [82, 12, 1, "", "create_sinusoidal_positions_long_rope"], [82, 12, 1, "", "create_sinusoidal_positions_yarn"], [82, 12, 1, "", "rotate_every_two"], [82, 12, 1, "", "rotate_half"]], "tensorrt_llm.functional.RotaryScalingType": [[82, 11, 1, "", "dynamic"], [82, 12, 1, "", "from_string"], [82, 11, 1, "", "linear"], [82, 11, 1, "", "llama3"], [82, 11, 1, "", "longrope"], [82, 11, 1, "", "mrope"], [82, 11, 1, "", "none"], [82, 11, 1, "", "yarn"]], "tensorrt_llm.functional.SideStreamIDType": [[82, 11, 1, "", "disable"], [82, 11, 1, "", "moe"]], "tensorrt_llm.functional.SliceInputType": [[82, 11, 1, "", "axes"], [82, 11, 1, "", "data"], [82, 11, 1, "", "fill_value"], [82, 11, 1, "", "size"], [82, 11, 1, "", "start"], [82, 11, 1, "", "stride"]], "tensorrt_llm.functional.Tensor": [[82, 12, 1, "", "abs"], [82, 12, 1, "", "cast"], [82, 13, 1, "", "dtype"], [82, 12, 1, "", "flatten"], [82, 12, 1, "", "get_parent"], [82, 12, 1, "", "get_users"], [82, 12, 1, "", "is_dynamic"], [82, 12, 1, "", "is_trt_wrapper"], [82, 13, 1, "", "location"], [82, 12, 1, "", "log"], [82, 12, 1, "", "mark_output"], [82, 12, 1, "", "max"], [82, 12, 1, "", "mean"], [82, 13, 1, "", "name"], [82, 12, 1, "", "ndim"], [82, 13, 1, "", "network"], [82, 12, 1, "", "permute"], [82, 12, 1, "", "rank"], [82, 12, 1, "", "repeat"], [82, 12, 1, "", "replace_all_uses_with"], [82, 12, 1, "", "select"], [82, 13, 1, "", "shape"], [82, 12, 1, "", "size"], [82, 12, 1, "", "split"], [82, 12, 1, "", "sqrt"], [82, 12, 1, "", "squeeze"], [82, 12, 1, "", "transpose"], [82, 12, 1, "", "unbind"], [82, 12, 1, "", "unsqueeze"], [82, 12, 1, "", "view"]], "tensorrt_llm.layers": [[83, 9, 0, "-", "activation"], [83, 9, 0, "-", "attention"], [83, 9, 0, "-", "cast"], [83, 9, 0, "-", "conv"], [83, 9, 0, "-", "embedding"], [83, 9, 0, "-", "linear"], [83, 9, 0, "-", "mlp"], [83, 9, 0, "-", "normalization"], [83, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[83, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[83, 10, 1, "", "Attention"], [83, 10, 1, "", "AttentionMaskParams"], [83, 10, 1, "", "AttentionParams"], [83, 10, 1, "", "BertAttention"], [83, 10, 1, "", "BlockSparseAttnParams"], [83, 10, 1, "", "CogVLMAttention"], [83, 10, 1, "", "DeepseekV2Attention"], [83, 10, 1, "", "DiffusersAttention"], [83, 10, 1, "", "KeyValueCacheParams"], [83, 10, 1, "", "MropeParams"], [83, 10, 1, "", "SpecDecodingParams"], [83, 14, 1, "", "compute_relative_bias"], [83, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[83, 12, 1, "", "create_attention_const_params"], [83, 12, 1, "", "fill_attention_params"], [83, 12, 1, "", "forward"], [83, 12, 1, "", "postprocess"], [83, 12, 1, "", "set_rel_attn_table"]], "tensorrt_llm.layers.attention.AttentionParams": [[83, 12, 1, "", "fill_attention_const_params_for_long_rope"], [83, 12, 1, "", "fill_attention_const_params_for_rope"], [83, 12, 1, "", "is_valid"], [83, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.CogVLMAttention": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.DeepseekV2Attention": [[83, 12, 1, "", "forward"], [83, 12, 1, "", "postprocess"], [83, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.attention.DiffusersAttention": [[83, 12, 1, "", "forward"], [83, 12, 1, "", "joint_attn_forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[83, 12, 1, "", "fill_none_tensor_list"], [83, 12, 1, "", "get_first_past_key_value"], [83, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[83, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[83, 10, 1, "", "Conv1d"], [83, 10, 1, "", "Conv2d"], [83, 10, 1, "", "Conv3d"], [83, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv3d": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[83, 10, 1, "", "CombinedTimestepLabelEmbeddings"], [83, 10, 1, "", "CombinedTimestepTextProjEmbeddings"], [83, 10, 1, "", "Embedding"], [83, 10, 1, "", "LabelEmbedding"], [83, 10, 1, "", "PixArtAlphaTextProjection"], [83, 10, 1, "", "PromptTuningEmbedding"], [83, 10, 1, "", "SD3PatchEmbed"], [83, 10, 1, "", "TimestepEmbedding"], [83, 10, 1, "", "Timesteps"], [83, 14, 1, "", "get_1d_sincos_pos_embed_from_grid"], [83, 14, 1, "", "get_2d_sincos_pos_embed"], [83, 14, 1, "", "get_2d_sincos_pos_embed_from_grid"], [83, 14, 1, "", "get_timestep_embedding"]], "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Embedding": [[83, 12, 1, "", "forward"], [83, 12, 1, "", "postprocess"], [83, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.LabelEmbedding": [[83, 12, 1, "", "forward"], [83, 12, 1, "", "token_drop"]], "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.SD3PatchEmbed": [[83, 12, 1, "", "cropped_pos_embed"], [83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.TimestepEmbedding": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Timesteps": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[83, 11, 1, "", "ColumnLinear"], [83, 10, 1, "", "Linear"], [83, 10, 1, "", "LinearBase"], [83, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[83, 12, 1, "", "collect_and_bias"], [83, 12, 1, "", "postprocess"], [83, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.linear.LinearBase": [[83, 12, 1, "", "collect_and_bias"], [83, 12, 1, "", "forward"], [83, 12, 1, "", "get_weight"], [83, 12, 1, "", "multiply_and_lora"], [83, 12, 1, "", "multiply_collect"], [83, 12, 1, "", "tp_split_dim"], [83, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[83, 12, 1, "", "collect_and_bias"], [83, 12, 1, "", "multiply_collect"], [83, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.mlp": [[83, 10, 1, "", "FusedGatedMLP"], [83, 10, 1, "", "GatedMLP"], [83, 10, 1, "", "LinearActivation"], [83, 10, 1, "", "LinearApproximateGELU"], [83, 10, 1, "", "LinearGEGLU"], [83, 10, 1, "", "LinearGELU"], [83, 10, 1, "", "LinearSwiGLU"], [83, 10, 1, "", "MLP"], [83, 14, 1, "", "fc_gate_dora"], [83, 14, 1, "", "fc_gate_lora"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[83, 12, 1, "", "fc_gate"], [83, 12, 1, "", "fc_gate_plugin"], [83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearActivation": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearApproximateGELU": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGEGLU": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGELU": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearSwiGLU": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[83, 10, 1, "", "AdaLayerNorm"], [83, 10, 1, "", "AdaLayerNormContinuous"], [83, 10, 1, "", "AdaLayerNormZero"], [83, 10, 1, "", "AdaLayerNormZeroSingle"], [83, 10, 1, "", "GroupNorm"], [83, 10, 1, "", "LayerNorm"], [83, 10, 1, "", "RmsNorm"], [83, 10, 1, "", "SD35AdaLayerNormZeroX"]], "tensorrt_llm.layers.normalization.AdaLayerNorm": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormContinuous": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZero": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.GroupNorm": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX": [[83, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[83, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[83, 12, 1, "", "forward"]], "tensorrt_llm.llmapi": [[70, 10, 1, "", "BatchingType"], [70, 10, 1, "", "BuildCacheConfig"], [70, 10, 1, "", "BuildConfig"], [70, 10, 1, "", "CacheTransceiverConfig"], [70, 10, 1, "", "CalibConfig"], [70, 10, 1, "", "CapacitySchedulerPolicy"], [70, 10, 1, "", "CompletionOutput"], [70, 10, 1, "", "ContextChunkingPolicy"], [70, 10, 1, "", "DisaggregatedParams"], [70, 10, 1, "", "DynamicBatchConfig"], [70, 10, 1, "", "EagleDecodingConfig"], [70, 10, 1, "", "ExtendedRuntimePerfKnobConfig"], [70, 10, 1, "", "GuidedDecodingParams"], [70, 10, 1, "", "KvCacheConfig"], [70, 10, 1, "", "KvCacheRetentionConfig"], [70, 10, 1, "", "LLM"], [70, 11, 1, "", "LlmArgs"], [70, 10, 1, "", "LookaheadDecodingConfig"], [70, 10, 1, "", "MTPDecodingConfig"], [70, 10, 1, "", "MedusaDecodingConfig"], [70, 10, 1, "", "MpiCommSession"], [70, 10, 1, "", "NGramDecodingConfig"], [70, 10, 1, "", "QuantAlgo"], [70, 10, 1, "", "QuantConfig"], [70, 10, 1, "", "RequestError"], [70, 10, 1, "", "RequestOutput"], [70, 10, 1, "", "SamplingParams"], [70, 10, 1, "", "SchedulerConfig"], [70, 10, 1, "", "TorchLlmArgs"], [70, 10, 1, "", "TrtLlmArgs"]], "tensorrt_llm.llmapi.BatchingType": [[70, 11, 1, "", "INFLIGHT"], [70, 11, 1, "", "STATIC"]], "tensorrt_llm.llmapi.BuildCacheConfig": [[70, 12, 1, "", "__init__"], [70, 13, 1, "id7", "cache_root"], [70, 13, 1, "id8", "max_cache_storage_gb"], [70, 13, 1, "id9", "max_records"]], "tensorrt_llm.llmapi.BuildConfig": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "auto_parallel_config"], [70, 11, 1, "", "dry_run"], [70, 11, 1, "", "enable_debug_output"], [70, 11, 1, "", "force_num_profiles"], [70, 12, 1, "", "from_dict"], [70, 12, 1, "", "from_json_file"], [70, 11, 1, "", "gather_context_logits"], [70, 11, 1, "", "gather_generation_logits"], [70, 11, 1, "", "input_timing_cache"], [70, 11, 1, "", "kv_cache_type"], [70, 11, 1, "", "lora_config"], [70, 11, 1, "", "max_batch_size"], [70, 11, 1, "", "max_beam_width"], [70, 11, 1, "", "max_draft_len"], [70, 11, 1, "", "max_encoder_input_len"], [70, 11, 1, "", "max_input_len"], [70, 11, 1, "", "max_num_tokens"], [70, 11, 1, "", "max_prompt_embedding_table_size"], [70, 11, 1, "", "max_seq_len"], [70, 11, 1, "", "monitor_memory"], [70, 11, 1, "", "opt_batch_size"], [70, 11, 1, "", "opt_num_tokens"], [70, 11, 1, "", "output_timing_cache"], [70, 11, 1, "", "plugin_config"], [70, 11, 1, "", "profiling_verbosity"], [70, 11, 1, "", "speculative_decoding_mode"], [70, 11, 1, "", "strongly_typed"], [70, 12, 1, "", "to_dict"], [70, 12, 1, "", "update"], [70, 12, 1, "", "update_from_dict"], [70, 12, 1, "", "update_kv_cache_type"], [70, 11, 1, "", "use_mrope"], [70, 11, 1, "", "use_refit"], [70, 11, 1, "", "use_strip_plan"], [70, 11, 1, "", "visualize_network"], [70, 11, 1, "", "weight_sparsity"], [70, 11, 1, "", "weight_streaming"]], "tensorrt_llm.llmapi.CacheTransceiverConfig": [[70, 15, 1, "", "max_num_tokens"], [70, 11, 1, "", "model_config"]], "tensorrt_llm.llmapi.CalibConfig": [[70, 15, 1, "", "calib_batch_size"], [70, 15, 1, "", "calib_batches"], [70, 15, 1, "", "calib_dataset"], [70, 15, 1, "", "calib_max_seq_length"], [70, 15, 1, "", "device"], [70, 12, 1, "", "from_dict"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "random_seed"], [70, 12, 1, "", "to_dict"], [70, 15, 1, "", "tokenizer_max_seq_length"]], "tensorrt_llm.llmapi.CapacitySchedulerPolicy": [[70, 11, 1, "", "GUARANTEED_NO_EVICT"], [70, 11, 1, "", "MAX_UTILIZATION"], [70, 11, 1, "", "STATIC_BATCH"]], "tensorrt_llm.llmapi.CompletionOutput": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "cumulative_logprob"], [70, 11, 1, "", "disaggregated_params"], [70, 11, 1, "", "finish_reason"], [70, 11, 1, "", "generation_logits"], [70, 11, 1, "", "index"], [70, 13, 1, "id2", "length"], [70, 11, 1, "", "logprobs"], [70, 13, 1, "id3", "logprobs_diff"], [70, 11, 1, "", "prompt_logprobs"], [70, 11, 1, "", "stop_reason"], [70, 11, 1, "", "text"], [70, 13, 1, "id4", "text_diff"], [70, 11, 1, "", "token_ids"], [70, 13, 1, "id5", "token_ids_diff"]], "tensorrt_llm.llmapi.ContextChunkingPolicy": [[70, 11, 1, "", "EQUAL_PROGRESS"], [70, 11, 1, "", "FIRST_COME_FIRST_SERVED"]], "tensorrt_llm.llmapi.DisaggregatedParams": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "ctx_request_id"], [70, 11, 1, "", "draft_tokens"], [70, 11, 1, "", "first_gen_tokens"], [70, 12, 1, "", "get_context_phase_params"], [70, 12, 1, "", "get_request_type"], [70, 11, 1, "", "opaque_state"], [70, 11, 1, "", "request_type"]], "tensorrt_llm.llmapi.DynamicBatchConfig": [[70, 15, 1, "", "dynamic_batch_moving_average_window"], [70, 15, 1, "", "enable_batch_size_tuning"], [70, 15, 1, "", "enable_max_num_tokens_tuning"], [70, 11, 1, "", "model_config"]], "tensorrt_llm.llmapi.EagleDecodingConfig": [[70, 11, 1, "", "decoding_type"], [70, 15, 1, "", "dynamic_tree_max_topK"], [70, 15, 1, "", "eagle3_one_model"], [70, 15, 1, "", "eagle_choices"], [70, 12, 1, "", "from_dict"], [70, 15, 1, "", "greedy_sampling"], [70, 15, 1, "", "max_non_leaves_per_layer"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "num_eagle_layers"], [70, 15, 1, "", "posterior_threshold"], [70, 15, 1, "", "pytorch_eagle_weights_path"], [70, 15, 1, "", "use_dynamic_tree"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig": [[70, 15, 1, "", "cuda_graph_cache_size"], [70, 15, 1, "", "cuda_graph_mode"], [70, 15, 1, "", "enable_context_fmha_fp32_acc"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "multi_block_mode"]], "tensorrt_llm.llmapi.GuidedDecodingParams": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "grammar"], [70, 11, 1, "", "json"], [70, 11, 1, "", "json_object"], [70, 11, 1, "", "regex"], [70, 11, 1, "", "structural_tag"]], "tensorrt_llm.llmapi.KvCacheConfig": [[70, 15, 1, "", "copy_on_partial_reuse"], [70, 15, 1, "", "cross_kv_cache_fraction"], [70, 15, 1, "", "enable_block_reuse"], [70, 15, 1, "", "enable_partial_reuse"], [70, 15, 1, "", "event_buffer_max_size"], [70, 15, 1, "", "free_gpu_memory_fraction"], [70, 15, 1, "", "host_cache_size"], [70, 15, 1, "", "max_attention_window"], [70, 15, 1, "", "max_tokens"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "onboard_blocks"], [70, 15, 1, "", "secondary_offload_min_priority"], [70, 15, 1, "", "sink_token_length"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig": [[70, 10, 1, "", "TokenRangeRetentionConfig"], [70, 12, 1, "", "__init__"], [70, 13, 1, "", "decode_duration_ms"], [70, 13, 1, "", "decode_retention_priority"], [70, 13, 1, "", "directory"], [70, 13, 1, "", "token_range_retention_configs"], [70, 13, 1, "", "transfer_mode"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig": [[70, 12, 1, "", "__init__"], [70, 13, 1, "", "duration_ms"], [70, 13, 1, "", "priority"], [70, 13, 1, "", "token_end"], [70, 13, 1, "", "token_start"]], "tensorrt_llm.llmapi.LLM": [[70, 12, 1, "", "__init__"], [70, 12, 1, "", "generate"], [70, 12, 1, "", "generate_async"], [70, 12, 1, "", "get_kv_cache_events"], [70, 12, 1, "", "get_kv_cache_events_async"], [70, 12, 1, "", "get_stats"], [70, 12, 1, "", "get_stats_async"], [70, 12, 1, "", "save"], [70, 12, 1, "", "shutdown"], [70, 13, 1, "id0", "tokenizer"], [70, 13, 1, "id1", "workspace"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig": [[70, 12, 1, "", "__init__"], [70, 12, 1, "", "calculate_speculative_resource"], [70, 11, 1, "", "decoding_type"], [70, 12, 1, "", "from_dict"], [70, 15, 1, "", "max_ngram_size"], [70, 15, 1, "", "max_verification_set_size"], [70, 15, 1, "", "max_window_size"], [70, 11, 1, "", "model_config"], [70, 16, 1, "", "validate_positive_values"]], "tensorrt_llm.llmapi.MTPDecodingConfig": [[70, 11, 1, "", "decoding_type"], [70, 12, 1, "", "from_dict"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "num_nextn_predict_layers"], [70, 15, 1, "", "relaxed_delta"], [70, 15, 1, "", "relaxed_topk"], [70, 15, 1, "", "use_relaxed_acceptance_for_thinking"]], "tensorrt_llm.llmapi.MedusaDecodingConfig": [[70, 11, 1, "", "decoding_type"], [70, 12, 1, "", "from_dict"], [70, 15, 1, "", "medusa_choices"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "num_medusa_heads"]], "tensorrt_llm.llmapi.MpiCommSession": [[70, 12, 1, "", "__init__"], [70, 12, 1, "", "abort"], [70, 12, 1, "", "get_comm"], [70, 12, 1, "", "shutdown"], [70, 12, 1, "", "submit"], [70, 12, 1, "", "submit_sync"]], "tensorrt_llm.llmapi.NGramDecodingConfig": [[70, 11, 1, "", "decoding_type"], [70, 12, 1, "", "from_dict"], [70, 15, 1, "", "is_keep_all"], [70, 15, 1, "", "is_public_pool"], [70, 15, 1, "", "is_use_oldest"], [70, 15, 1, "", "max_matching_ngram_size"], [70, 11, 1, "", "model_config"], [70, 15, 1, "", "prompt_lookup_num_tokens"]], "tensorrt_llm.llmapi.QuantAlgo": [[70, 11, 1, "", "FP8"], [70, 11, 1, "", "FP8_BLOCK_SCALES"], [70, 11, 1, "", "FP8_PER_CHANNEL_PER_TOKEN"], [70, 11, 1, "", "INT8"], [70, 11, 1, "", "MIXED_PRECISION"], [70, 11, 1, "", "NO_QUANT"], [70, 11, 1, "", "NVFP4"], [70, 11, 1, "", "W4A16"], [70, 11, 1, "", "W4A16_AWQ"], [70, 11, 1, "", "W4A16_GPTQ"], [70, 11, 1, "", "W4A8_AWQ"], [70, 11, 1, "", "W4A8_QSERVE_PER_CHANNEL"], [70, 11, 1, "", "W4A8_QSERVE_PER_GROUP"], [70, 11, 1, "", "W8A16"], [70, 11, 1, "", "W8A16_GPTQ"], [70, 11, 1, "", "W8A8_SQ_PER_CHANNEL"], [70, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"], [70, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"], [70, 11, 1, "", "W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"], [70, 11, 1, "", "W8A8_SQ_PER_TENSOR_PLUGIN"]], "tensorrt_llm.llmapi.QuantConfig": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "clamp_val"], [70, 11, 1, "", "exclude_modules"], [70, 12, 1, "", "from_dict"], [70, 11, 1, "", "group_size"], [70, 11, 1, "", "has_zero_point"], [70, 12, 1, "", "is_module_excluded_from_quantization"], [70, 11, 1, "", "kv_cache_quant_algo"], [70, 13, 1, "", "layer_quant_mode"], [70, 11, 1, "", "pre_quant_scale"], [70, 11, 1, "", "quant_algo"], [70, 13, 1, "", "quant_mode"], [70, 11, 1, "", "smoothquant_val"], [70, 12, 1, "", "to_dict"], [70, 11, 1, "", "use_meta_recipe"]], "tensorrt_llm.llmapi.RequestOutput": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "context_logits"], [70, 11, 1, "", "finished"], [70, 11, 1, "", "outputs"], [70, 13, 1, "id6", "prompt"], [70, 11, 1, "", "prompt_token_ids"], [70, 11, 1, "", "request_id"]], "tensorrt_llm.llmapi.SamplingParams": [[70, 12, 1, "", "__init__"], [70, 11, 1, "", "add_special_tokens"], [70, 11, 1, "", "additional_model_outputs"], [70, 11, 1, "", "apply_batched_logits_processor"], [70, 11, 1, "", "bad"], [70, 11, 1, "", "bad_token_ids"], [70, 11, 1, "", "beam_search_diversity_rate"], [70, 11, 1, "", "beam_width_array"], [70, 11, 1, "", "best_of"], [70, 11, 1, "", "detokenize"], [70, 11, 1, "", "early_stopping"], [70, 11, 1, "", "embedding_bias"], [70, 11, 1, "", "end_id"], [70, 11, 1, "", "exclude_input_from_output"], [70, 11, 1, "", "frequency_penalty"], [70, 11, 1, "", "guided_decoding"], [70, 11, 1, "", "ignore_eos"], [70, 11, 1, "", "include_stop_str_in_output"], [70, 11, 1, "", "length_penalty"], [70, 11, 1, "", "logits_processor"], [70, 11, 1, "", "logprobs"], [70, 11, 1, "", "lookahead_config"], [70, 11, 1, "", "max_tokens"], [70, 11, 1, "", "min_p"], [70, 11, 1, "", "min_tokens"], [70, 11, 1, "", "n"], [70, 11, 1, "", "no_repeat_ngram_size"], [70, 11, 1, "", "pad_id"], [70, 11, 1, "", "presence_penalty"], [70, 11, 1, "", "prompt_logprobs"], [70, 11, 1, "", "repetition_penalty"], [70, 11, 1, "", "return_context_logits"], [70, 11, 1, "", "return_encoder_output"], [70, 11, 1, "", "return_generation_logits"], [70, 11, 1, "", "return_perf_metrics"], [70, 11, 1, "", "seed"], [70, 11, 1, "", "skip_special_tokens"], [70, 11, 1, "", "spaces_between_special_tokens"], [70, 11, 1, "", "stop"], [70, 11, 1, "", "stop_token_ids"], [70, 11, 1, "", "temperature"], [70, 11, 1, "", "top_k"], [70, 11, 1, "", "top_p"], [70, 11, 1, "", "top_p_decay"], [70, 11, 1, "", "top_p_min"], [70, 11, 1, "", "top_p_reset_ids"], [70, 11, 1, "", "truncate_prompt_tokens"], [70, 11, 1, "", "use_beam_search"]], "tensorrt_llm.llmapi.SchedulerConfig": [[70, 15, 1, "", "capacity_scheduler_policy"], [70, 15, 1, "", "context_chunking_policy"], [70, 15, 1, "", "dynamic_batch_config"], [70, 11, 1, "", "model_config"]], "tensorrt_llm.llmapi.TorchLlmArgs": [[70, 15, 1, "", "attn_backend"], [70, 15, 1, "", "auto_deploy_config"], [70, 15, 1, "", "autotuner_enabled"], [70, 15, 1, "", "build_config"], [70, 16, 1, "", "convert_load_format"], [70, 15, 1, "", "cuda_graph_batch_sizes"], [70, 15, 1, "", "cuda_graph_max_batch_size"], [70, 15, 1, "", "cuda_graph_padding_enabled"], [70, 11, 1, "", "decoding_config"], [70, 15, 1, "", "disable_overlap_scheduler"], [70, 15, 1, "", "enable_iter_perf_stats"], [70, 15, 1, "", "enable_iter_req_stats"], [70, 15, 1, "", "enable_layerwise_nvtx_marker"], [70, 15, 1, "", "enable_min_latency"], [70, 15, 1, "", "enable_trtllm_sampler"], [70, 13, 1, "", "extra_resource_managers"], [70, 11, 1, "id18", "field_name"], [70, 12, 1, "", "get_pytorch_backend_config"], [70, 15, 1, "", "kv_cache_dtype"], [70, 15, 1, "", "load_format"], [70, 11, 1, "", "max_cpu_loras"], [70, 11, 1, "", "max_lora_rank"], [70, 11, 1, "", "max_loras"], [70, 15, 1, "", "mixed_sampler"], [70, 11, 1, "", "model_config"], [70, 12, 1, "", "model_post_init"], [70, 15, 1, "", "moe_backend"], [70, 15, 1, "", "moe_load_balancer"], [70, 15, 1, "", "moe_max_num_tokens"], [70, 11, 1, "id16", "msg"], [70, 15, 1, "", "print_iter_log"], [70, 15, 1, "", "torch_compile_enable_userbuffers"], [70, 15, 1, "", "torch_compile_enabled"], [70, 15, 1, "", "torch_compile_fullgraph"], [70, 15, 1, "", "torch_compile_inductor_enabled"], [70, 15, 1, "", "torch_compile_piecewise_cuda_graph"], [70, 15, 1, "", "use_cuda_graph"], [70, 15, 1, "", "use_kv_cache"], [70, 16, 1, "", "validate_cuda_graph_config"], [70, 16, 1, "", "validate_cuda_graph_max_batch_size"], [70, 11, 1, "id17", "wrapped_property"]], "tensorrt_llm.llmapi.TrtLlmArgs": [[70, 11, 1, "", "auto_parallel"], [70, 13, 1, "", "auto_parallel_config"], [70, 11, 1, "", "auto_parallel_world_size"], [70, 15, 1, "", "build_config"], [70, 15, 1, "", "calib_config"], [70, 11, 1, "", "decoding_config"], [70, 15, 1, "", "embedding_parallel_mode"], [70, 15, 1, "", "enable_build_cache"], [70, 15, 1, "", "enable_tqdm"], [70, 15, 1, "", "extended_runtime_perf_knob_config"], [70, 15, 1, "", "fast_build"], [70, 11, 1, "id33", "field_name"], [70, 11, 1, "", "max_cpu_loras"], [70, 11, 1, "", "max_lora_rank"], [70, 11, 1, "", "max_loras"], [70, 11, 1, "", "model_config"], [70, 12, 1, "", "model_post_init"], [70, 11, 1, "id31", "msg"], [70, 15, 1, "", "workspace"], [70, 11, 1, "id32", "wrapped_property"]], "tensorrt_llm.models": [[84, 10, 1, "", "BaichuanForCausalLM"], [84, 10, 1, "", "BertForQuestionAnswering"], [84, 10, 1, "", "BertForSequenceClassification"], [84, 10, 1, "", "BertModel"], [84, 10, 1, "", "BloomForCausalLM"], [84, 10, 1, "", "BloomModel"], [84, 10, 1, "", "CLIPVisionTransformer"], [84, 10, 1, "", "ChatGLMConfig"], [84, 10, 1, "", "ChatGLMForCausalLM"], [84, 10, 1, "", "ChatGLMModel"], [84, 10, 1, "", "CogVLMConfig"], [84, 10, 1, "", "CogVLMForCausalLM"], [84, 10, 1, "", "CohereForCausalLM"], [84, 10, 1, "", "DbrxConfig"], [84, 10, 1, "", "DbrxForCausalLM"], [84, 10, 1, "", "DecoderModel"], [84, 10, 1, "", "DeepseekForCausalLM"], [84, 10, 1, "", "DeepseekV2ForCausalLM"], [84, 10, 1, "", "DiT"], [84, 10, 1, "", "EagleForCausalLM"], [84, 10, 1, "", "EncoderModel"], [84, 10, 1, "", "FalconConfig"], [84, 10, 1, "", "FalconForCausalLM"], [84, 10, 1, "", "FalconModel"], [84, 10, 1, "", "GPTConfig"], [84, 10, 1, "", "GPTForCausalLM"], [84, 10, 1, "", "GPTJConfig"], [84, 10, 1, "", "GPTJForCausalLM"], [84, 10, 1, "", "GPTJModel"], [84, 10, 1, "", "GPTModel"], [84, 10, 1, "", "GPTNeoXForCausalLM"], [84, 10, 1, "", "GPTNeoXModel"], [84, 10, 1, "", "GemmaConfig"], [84, 10, 1, "", "GemmaForCausalLM"], [84, 10, 1, "", "LLaMAConfig"], [84, 10, 1, "", "LLaMAForCausalLM"], [84, 10, 1, "", "LLaMAModel"], [84, 10, 1, "", "LlavaNextVisionConfig"], [84, 10, 1, "", "LlavaNextVisionWrapper"], [84, 10, 1, "", "MLLaMAForCausalLM"], [84, 10, 1, "", "MPTForCausalLM"], [84, 10, 1, "", "MPTModel"], [84, 10, 1, "", "MambaForCausalLM"], [84, 10, 1, "", "MedusaConfig"], [84, 10, 1, "", "MedusaForCausalLm"], [84, 10, 1, "", "OPTForCausalLM"], [84, 10, 1, "", "OPTModel"], [84, 10, 1, "", "Phi3ForCausalLM"], [84, 10, 1, "", "Phi3Model"], [84, 10, 1, "", "PhiForCausalLM"], [84, 10, 1, "", "PhiModel"], [84, 10, 1, "", "PretrainedConfig"], [84, 10, 1, "", "PretrainedModel"], [84, 10, 1, "", "ReDrafterForCausalLM"], [84, 10, 1, "", "RecurrentGemmaForCausalLM"], [84, 11, 1, "", "RobertaForQuestionAnswering"], [84, 11, 1, "", "RobertaForSequenceClassification"], [84, 11, 1, "", "RobertaModel"], [84, 10, 1, "", "SD3Transformer2DModel"], [84, 10, 1, "", "SpeculativeDecodingMode"], [84, 10, 1, "", "WhisperEncoder"]], "tensorrt_llm.models.BaichuanForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "quantize"]], "tensorrt_llm.models.BertForQuestionAnswering": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.CLIPVisionTransformer": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMConfig": [[84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "quantize"]], "tensorrt_llm.models.ChatGLMModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.CogVLMConfig": [[84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.CogVLMForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "default_plugin_config"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "quantize"]], "tensorrt_llm.models.CohereForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DbrxConfig": [[84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.DbrxForCausalLM": [[84, 11, 1, "", "config_class"]], "tensorrt_llm.models.DecoderModel": [[84, 12, 1, "", "check_config"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "precompute_relative_attention_bias"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.DeepseekForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DeepseekV2ForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DiT": [[84, 12, 1, "", "check_config"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "forward_with_cfg"], [84, 12, 1, "", "forward_without_cfg"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "unpatchify"]], "tensorrt_llm.models.EagleForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[84, 12, 1, "", "check_config"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "precompute_relative_attention_bias"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "use_lora"], [84, 12, 1, "", "use_prompt_tuning"]], "tensorrt_llm.models.FalconConfig": [[84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.FalconForCausalLM": [[84, 12, 1, "", "check_config"], [84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.FalconModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTConfig": [[84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "from_nemo"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "from_nemo"], [84, 12, 1, "", "quantize"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJConfig": [[84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTJForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.GPTJModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaConfig": [[84, 11, 1, "", "GEMMA2_ADDED_FIELDS"], [84, 11, 1, "", "GEMMA3_ADDED_FIELDS"], [84, 11, 1, "", "GEMMA_ADDED_FIELDS"], [84, 11, 1, "", "VERBATIM"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "gemma2_config"], [84, 12, 1, "", "gemma3_config"], [84, 12, 1, "", "get_hf_config"], [84, 13, 1, "", "is_gemma_2"], [84, 13, 1, "", "is_gemma_3"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GemmaForCausalLM": [[84, 11, 1, "", "NATIVE_QUANT_FLOW"], [84, 12, 1, "", "assert_valid_quant_algo"], [84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "quantize"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAConfig": [[84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "from_meta_ckpt"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.LLaMAForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "default_plugin_config"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "from_meta_ckpt"], [84, 12, 1, "", "quantize"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.LlavaNextVisionConfig": [[84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LlavaNextVisionWrapper": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.MLLaMAForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.MPTForCausalLM": [[84, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaConfig": [[84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "to_dict"]], "tensorrt_llm.models.MedusaForCausalLm": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.OPTForCausalLM": [[84, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.Phi3ForCausalLM": [[84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.Phi3Model": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[84, 12, 1, "", "check_config"], [84, 11, 1, "", "config_class"], [84, 12, 1, "", "from_hugging_face"], [84, 12, 1, "", "use_lora"]], "tensorrt_llm.models.PhiModel": [[84, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[84, 12, 1, "", "create_runtime_defaults"], [84, 12, 1, "", "for_each_rank"], [84, 12, 1, "", "from_checkpoint"], [84, 12, 1, "", "from_dict"], [84, 12, 1, "", "from_json_file"], [84, 12, 1, "", "get_config_group"], [84, 12, 1, "", "has_config_group"], [84, 13, 1, "", "kv_dtype"], [84, 13, 1, "", "quant_algo"], [84, 13, 1, "", "quant_mode"], [84, 12, 1, "", "set_if_not_exist"], [84, 12, 1, "", "set_rank"], [84, 12, 1, "", "to_dict"], [84, 12, 1, "", "to_json_file"], [84, 12, 1, "", "to_layer_quant_config"]], "tensorrt_llm.models.PretrainedModel": [[84, 12, 1, "", "check_config"], [84, 12, 1, "", "from_checkpoint"], [84, 12, 1, "", "from_config"], [84, 12, 1, "", "load"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "quantize"], [84, 12, 1, "", "release"], [84, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.ReDrafterForCausalLM": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.RecurrentGemmaForCausalLM": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "prepare_recurrent_inputs"]], "tensorrt_llm.models.SD3Transformer2DModel": [[84, 13, 1, "", "attn_processors"], [84, 11, 1, "", "config_class"], [84, 12, 1, "", "disable_forward_chunking"], [84, 12, 1, "", "enable_forward_chunking"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "from_pretrained"], [84, 12, 1, "", "fuse_qkv_projections"], [84, 12, 1, "", "load"], [84, 12, 1, "", "prepare_inputs"], [84, 12, 1, "", "set_attn_processor"], [84, 12, 1, "", "unfuse_qkv_projections"]], "tensorrt_llm.models.SpeculativeDecodingMode": [[84, 11, 1, "", "DRAFT_TOKENS_EXTERNAL"], [84, 11, 1, "", "EAGLE"], [84, 11, 1, "", "EXPLICIT_DRAFT_TOKENS"], [84, 11, 1, "", "LOOKAHEAD_DECODING"], [84, 11, 1, "", "MEDUSA"], [84, 11, 1, "", "NGRAM"], [84, 11, 1, "", "NONE"], [84, 12, 1, "", "from_arguments"]], "tensorrt_llm.models.WhisperEncoder": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "precompute_relative_attention_bias"], [84, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[85, 10, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[85, 12, 1, "", "to_legacy_setting"]], "tensorrt_llm.quantization": [[86, 10, 1, "", "QuantAlgo"], [86, 10, 1, "", "QuantMode"], [86, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[87, 10, 1, "", "ChatGLMGenerationSession"], [87, 10, 1, "", "EncDecModelRunner"], [87, 10, 1, "", "GenerationSequence"], [87, 10, 1, "", "GenerationSession"], [87, 10, 1, "", "KVCacheManager"], [87, 10, 1, "", "LogitsProcessor"], [87, 10, 1, "", "LogitsProcessorList"], [87, 10, 1, "", "ModelConfig"], [87, 10, 1, "", "ModelRunner"], [87, 10, 1, "", "ModelRunnerCpp"], [87, 10, 1, "", "MultimodalModelRunner"], [87, 10, 1, "", "QWenForCausalLMGenerationSession"], [87, 10, 1, "", "SamplingConfig"], [87, 10, 1, "", "Session"], [87, 10, 1, "", "StoppingCriteria"], [87, 10, 1, "", "StoppingCriteriaList"], [87, 10, 1, "", "TensorInfo"], [87, 14, 1, "", "decode_words_list"]], "tensorrt_llm.runtime.EncDecModelRunner": [[87, 12, 1, "", "encoder_run"], [87, 12, 1, "", "from_engine"], [87, 12, 1, "", "generate"], [87, 12, 1, "", "process_input"]], "tensorrt_llm.runtime.GenerationSequence": [[87, 12, 1, "", "get_batch_idx"], [87, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[87, 11, 1, "", "batch_size"], [87, 11, 1, "", "buffer_allocated"], [87, 13, 1, "", "context_mem_size"], [87, 13, 1, "", "conv_kernel"], [87, 13, 1, "", "cross_attention"], [87, 11, 1, "", "cuda_graph_mode"], [87, 12, 1, "", "cuda_stream_guard"], [87, 11, 1, "", "debug_mode"], [87, 11, 1, "", "debug_tensors_to_save"], [87, 12, 1, "", "decode"], [87, 12, 1, "", "decode_batch"], [87, 12, 1, "", "decode_regular"], [87, 12, 1, "", "decode_stream"], [87, 11, 1, "", "device"], [87, 13, 1, "", "dtype"], [87, 12, 1, "", "dump_debug_buffers"], [87, 12, 1, "", "early_stop_criteria"], [87, 13, 1, "", "engine_inspector"], [87, 12, 1, "", "filter_medusa_logits"], [87, 12, 1, "", "finalize_decoder"], [87, 12, 1, "", "find_best_medusa_path"], [87, 13, 1, "", "first_layer"], [87, 13, 1, "", "gather_context_logits"], [87, 13, 1, "", "gather_generation_logits"], [87, 13, 1, "", "gemm_allreduce_plugin"], [87, 12, 1, "", "get_next_medusa_tokens"], [87, 12, 1, "", "get_num_heads_kv"], [87, 12, 1, "", "handle_per_step"], [87, 13, 1, "", "has_position_embedding"], [87, 13, 1, "", "has_token_type_embedding"], [87, 13, 1, "", "head_size"], [87, 13, 1, "", "hidden_size"], [87, 13, 1, "", "is_medusa_mode"], [87, 13, 1, "", "is_redrafter_mode"], [87, 13, 1, "", "kv_cache_type"], [87, 13, 1, "", "last_layer"], [87, 12, 1, "", "locate_accepted_draft_tokens"], [87, 11, 1, "", "mapping"], [87, 13, 1, "", "max_draft_tokens"], [87, 13, 1, "", "max_prompt_embedding_table_size"], [87, 12, 1, "", "medusa_decode_and_verify"], [87, 11, 1, "", "medusa_paths"], [87, 11, 1, "", "medusa_position_offsets"], [87, 11, 1, "", "medusa_temperature"], [87, 11, 1, "", "medusa_topks"], [87, 11, 1, "", "medusa_tree_ids"], [87, 12, 1, "", "next_medusa_input_ids"], [87, 11, 1, "", "num_draft_tokens"], [87, 13, 1, "", "num_heads"], [87, 13, 1, "", "num_layers"], [87, 13, 1, "", "num_medusa_heads"], [87, 13, 1, "", "paged_kv_cache"], [87, 13, 1, "", "paged_state"], [87, 12, 1, "", "pp_communicate_final_output_ids"], [87, 12, 1, "", "pp_communicate_new_tokens"], [87, 12, 1, "", "process_logits_including_draft"], [87, 13, 1, "", "profiler"], [87, 13, 1, "", "quant_mode"], [87, 13, 1, "", "remove_input_padding"], [87, 12, 1, "", "reorder_kv_cache_for_beam_search"], [87, 13, 1, "", "rnn_conv_dim_size"], [87, 13, 1, "", "rnn_head_size"], [87, 13, 1, "", "rnn_hidden_size"], [87, 11, 1, "", "runtime"], [87, 12, 1, "", "setup"], [87, 13, 1, "", "state_dtype"], [87, 13, 1, "", "state_size"], [87, 13, 1, "", "tokens_per_block"], [87, 12, 1, "", "update_output_ids_by_offset"], [87, 13, 1, "", "use_gemm_allreduce_plugin"], [87, 13, 1, "", "use_gpt_attention_plugin"], [87, 13, 1, "", "use_kv_cache"], [87, 13, 1, "", "use_lora_plugin"], [87, 13, 1, "", "use_mamba_conv1d_plugin"], [87, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[87, 12, 1, "", "add_sequence"], [87, 12, 1, "", "get_block_offsets"], [87, 12, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[87, 11, 1, "", "conv_kernel"], [87, 11, 1, "", "cross_attention"], [87, 11, 1, "", "dtype"], [87, 11, 1, "", "gather_context_logits"], [87, 11, 1, "", "gather_generation_logits"], [87, 11, 1, "", "gemm_allreduce_plugin"], [87, 11, 1, "", "gpt_attention_plugin"], [87, 11, 1, "", "gpu_weights_percent"], [87, 11, 1, "", "has_position_embedding"], [87, 11, 1, "", "has_token_type_embedding"], [87, 11, 1, "", "head_size"], [87, 11, 1, "", "hidden_size"], [87, 11, 1, "", "kv_cache_type"], [87, 11, 1, "", "language_adapter_config"], [87, 11, 1, "", "layer_types"], [87, 11, 1, "", "lora_plugin"], [87, 11, 1, "", "lora_target_modules"], [87, 11, 1, "", "mamba_conv1d_plugin"], [87, 11, 1, "", "max_batch_size"], [87, 11, 1, "", "max_beam_width"], [87, 11, 1, "", "max_medusa_tokens"], [87, 11, 1, "", "max_prompt_embedding_table_size"], [87, 11, 1, "", "model_name"], [87, 11, 1, "", "num_heads"], [87, 11, 1, "", "num_kv_heads"], [87, 11, 1, "", "num_kv_heads_per_cross_attn_layer"], [87, 11, 1, "", "num_kv_heads_per_layer"], [87, 11, 1, "", "num_layers"], [87, 11, 1, "", "num_medusa_heads"], [87, 11, 1, "", "paged_state"], [87, 11, 1, "", "quant_mode"], [87, 11, 1, "", "redrafter_draft_len_per_beam"], [87, 11, 1, "", "redrafter_num_beams"], [87, 11, 1, "", "remove_input_padding"], [87, 11, 1, "", "rnn_conv_dim_size"], [87, 11, 1, "", "rnn_head_size"], [87, 11, 1, "", "rnn_hidden_size"], [87, 11, 1, "", "skip_cross_attn_blocks"], [87, 11, 1, "", "skip_cross_kv"], [87, 11, 1, "", "state_dtype"], [87, 11, 1, "", "state_size"], [87, 11, 1, "", "tokens_per_block"], [87, 11, 1, "", "trtllm_modules_to_hf_modules"], [87, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[87, 13, 1, "", "dtype"], [87, 12, 1, "", "from_dir"], [87, 12, 1, "", "from_engine"], [87, 13, 1, "", "gather_context_logits"], [87, 13, 1, "", "gather_generation_logits"], [87, 12, 1, "", "generate"], [87, 13, 1, "", "hidden_size"], [87, 13, 1, "", "mapping"], [87, 13, 1, "", "max_prompt_embedding_table_size"], [87, 13, 1, "", "max_sequence_length"], [87, 13, 1, "", "num_heads"], [87, 13, 1, "", "num_layers"], [87, 13, 1, "", "remove_input_padding"], [87, 12, 1, "", "serialize_engine"], [87, 13, 1, "", "use_lora_plugin"], [87, 13, 1, "", "vocab_size"], [87, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[87, 13, 1, "", "dtype"], [87, 12, 1, "", "from_dir"], [87, 13, 1, "", "gather_context_logits"], [87, 13, 1, "", "gather_generation_logits"], [87, 12, 1, "", "generate"], [87, 13, 1, "", "hidden_size"], [87, 13, 1, "", "max_prompt_embedding_table_size"], [87, 13, 1, "", "max_sequence_length"], [87, 13, 1, "", "num_heads"], [87, 13, 1, "", "num_layers"], [87, 13, 1, "", "remove_input_padding"], [87, 13, 1, "", "vocab_size"], [87, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.MultimodalModelRunner": [[87, 13, 1, "", "audio_engine_dir"], [87, 13, 1, "", "cpp_e2e"], [87, 13, 1, "", "cpp_llm_only"], [87, 12, 1, "", "generate"], [87, 12, 1, "", "get_audio_features"], [87, 12, 1, "", "get_rope_index"], [87, 12, 1, "", "get_visual_features"], [87, 12, 1, "", "init_audio_encoder"], [87, 12, 1, "", "init_image_encoder"], [87, 12, 1, "", "init_llm"], [87, 12, 1, "", "init_processor"], [87, 12, 1, "", "init_tokenizer"], [87, 13, 1, "", "llm_engine_dir"], [87, 12, 1, "", "load_test_audio"], [87, 12, 1, "", "load_test_data"], [87, 12, 1, "", "prepare_position_ids_for_cogvlm"], [87, 12, 1, "", "preprocess"], [87, 12, 1, "", "ptuning_setup"], [87, 12, 1, "", "ptuning_setup_fuyu"], [87, 12, 1, "", "ptuning_setup_llava_next"], [87, 12, 1, "", "ptuning_setup_phi3"], [87, 12, 1, "", "ptuning_setup_pixtral"], [87, 13, 1, "", "python_e2e"], [87, 12, 1, "", "run"], [87, 12, 1, "", "setup_fake_prompts"], [87, 12, 1, "", "setup_fake_prompts_qwen2vl"], [87, 12, 1, "", "setup_fake_prompts_vila"], [87, 12, 1, "", "setup_inputs"], [87, 12, 1, "", "split_prompt_by_images"], [87, 12, 1, "", "tokenizer_image_token"], [87, 12, 1, "", "video_preprocess"], [87, 13, 1, "", "visual_engine_dir"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[87, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[87, 11, 1, "", "bad_words_list"], [87, 11, 1, "", "beam_search_diversity_rate"], [87, 11, 1, "", "early_stopping"], [87, 11, 1, "", "end_id"], [87, 11, 1, "", "frequency_penalty"], [87, 11, 1, "", "length_penalty"], [87, 11, 1, "", "max_attention_window_size"], [87, 11, 1, "", "max_new_tokens"], [87, 11, 1, "", "min_length"], [87, 11, 1, "", "min_p"], [87, 11, 1, "", "no_repeat_ngram_size"], [87, 11, 1, "", "num_beams"], [87, 11, 1, "", "num_return_sequences"], [87, 11, 1, "", "output_cum_log_probs"], [87, 11, 1, "", "output_log_probs"], [87, 11, 1, "", "output_sequence_lengths"], [87, 11, 1, "", "pad_id"], [87, 11, 1, "", "presence_penalty"], [87, 11, 1, "", "random_seed"], [87, 11, 1, "", "repetition_penalty"], [87, 11, 1, "", "return_dict"], [87, 11, 1, "", "sink_token_length"], [87, 11, 1, "", "stop_words_list"], [87, 11, 1, "", "temperature"], [87, 11, 1, "", "top_k"], [87, 11, 1, "", "top_p"], [87, 11, 1, "", "top_p_decay"], [87, 11, 1, "", "top_p_min"], [87, 11, 1, "", "top_p_reset_ids"], [87, 12, 1, "", "update"], [87, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[87, 13, 1, "", "context"], [87, 13, 1, "", "context_mem_size"], [87, 13, 1, "", "engine"], [87, 12, 1, "", "from_engine"], [87, 12, 1, "", "from_serialized_engine"], [87, 12, 1, "", "infer_shapes"], [87, 12, 1, "", "run"], [87, 13, 1, "", "runtime"], [87, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[87, 11, 1, "", "dtype"], [87, 11, 1, "", "name"], [87, 12, 1, "", "numel"], [87, 11, 1, "", "shape"], [87, 12, 1, "", "squeeze"], [87, 12, 1, "", "view"]], "trtllm-serve-disaggregated": [[30, 17, 1, "cmdoption-trtllm-serve-disaggregated-c", "--config_file"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated-r", "--request_timeout"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated-t", "--server_start_timeout"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated-c", "-c"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated-r", "-r"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated-t", "-t"]], "trtllm-serve-disaggregated_mpi_worker": [[30, 17, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "--config_file"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", "--log_level"], [30, 17, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "-c"]], "trtllm-serve-serve": [[30, 17, 1, "cmdoption-trtllm-serve-serve-backend", "--backend"], [30, 17, 1, "cmdoption-trtllm-serve-serve-cluster_size", "--cluster_size"], [30, 17, 1, "cmdoption-trtllm-serve-serve-ep_size", "--ep_size"], [30, 17, 1, "cmdoption-trtllm-serve-serve-extra_llm_api_options", "--extra_llm_api_options"], [30, 17, 1, "cmdoption-trtllm-serve-serve-gpus_per_node", "--gpus_per_node"], [30, 17, 1, "cmdoption-trtllm-serve-serve-host", "--host"], [30, 17, 1, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [30, 17, 1, "cmdoption-trtllm-serve-serve-log_level", "--log_level"], [30, 17, 1, "cmdoption-trtllm-serve-serve-max_batch_size", "--max_batch_size"], [30, 17, 1, "cmdoption-trtllm-serve-serve-max_beam_width", "--max_beam_width"], [30, 17, 1, "cmdoption-trtllm-serve-serve-max_num_tokens", "--max_num_tokens"], [30, 17, 1, "cmdoption-trtllm-serve-serve-max_seq_len", "--max_seq_len"], [30, 17, 1, "cmdoption-trtllm-serve-serve-num_postprocess_workers", "--num_postprocess_workers"], [30, 17, 1, "cmdoption-trtllm-serve-serve-port", "--port"], [30, 17, 1, "cmdoption-trtllm-serve-serve-pp_size", "--pp_size"], [30, 17, 1, "cmdoption-trtllm-serve-serve-reasoning_parser", "--reasoning_parser"], [30, 17, 1, "cmdoption-trtllm-serve-serve-tokenizer", "--tokenizer"], [30, 17, 1, "cmdoption-trtllm-serve-serve-tp_size", "--tp_size"], [30, 17, 1, "cmdoption-trtllm-serve-serve-trust_remote_code", "--trust_remote_code"], [30, 17, 1, "cmdoption-trtllm-serve-serve-arg-MODEL", "MODEL"]]}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "class", "C++ class"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"], "15": ["py", "pydantic_field", "Python field"], "16": ["py", "pydantic_validator", "Python validator"], "17": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:class", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function", "15": "py:pydantic_field", "16": "py:pydantic_validator", "17": "std:cmdoption"}, "terms": {"": [0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 45, 46, 50, 51, 52, 58, 65, 69, 70, 71, 73, 75, 77, 78, 79, 80, 82, 83, 84, 87, 88, 89, 90, 92, 93, 95, 96, 97, 98], "0": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 13, 15, 16, 17, 19, 20, 22, 23, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 62, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 79, 80, 81, 82, 83, 84, 87, 88, 89, 91, 92, 94, 95, 99], "00": [16, 26, 55, 56, 57, 73, 74, 75, 92], "000": [20, 73], "0000": [73, 75], "0007503032684326172": 30, "001": 51, "0012": 73, "0017": 74, "003": 74, "0047": 92, "005": 74, "0070": 92, "0071": 92, "0096": 92, "00978": 90, "01": [25, 26, 55, 56, 57, 73, 74, 89, 93], "014": 23, "0158": 75, "016": 74, "0162": 77, "0165": 79, "017": 74, "02": [74, 93], "021": 74, "022": 74, "0235": 92, "0260": 92, "0273": 92, "028": 74, "0294": 92, "03": [79, 92, 93], "032": 26, "0339": 74, "03762": 82, "03961": 4, "03x": 27, "04": [66, 67, 74, 91, 93, 94], "043": 74, "0449": 92, "0461": 20, "0463": 74, "05": [74, 82, 83, 84, 92, 93], "05100": 82, "0523": 92, "055": 74, "0554": 75, "0560": 92, "0563": 74, "06": [26, 73, 74, 82, 83], "0630": 92, "0669": 20, "068": 74, "0682": 92, "0689e": 73, "07": [25, 26, 74, 93], "0704": 75, "0713": 92, "0723": 92, "0732": 92, "0758": 20, "0772": 20, "0776": 92, "08": [26, 74, 79], "0804": 92, "082": 74, "0838": 74, "0881": 80, "089": 74, "09": [26, 92], "0903": 92, "0910": 92, "092": 74, "09353": 10, "0964": 74, "09685": 10, "097": 74, "09f": [0, 1], "0b": 2, "0e": 6, "0f": [0, 6, 70], "0rc1": 73, "0u": 1, "0x": 22, "0x0000000000000000": 93, "1": [0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 62, 64, 66, 67, 69, 70, 72, 73, 75, 76, 77, 79, 81, 82, 83, 84, 86, 87, 88, 91, 92, 94, 98], "10": [0, 9, 10, 12, 20, 25, 26, 27, 30, 36, 38, 43, 44, 51, 59, 64, 67, 70, 73, 74, 75, 77, 80, 82, 89, 91, 92], "100": [0, 9, 20, 30, 38, 56, 72, 73, 75, 88], "1000": [0, 72, 73, 74, 75], "10000": [82, 83, 84], "1003": 93, "100gb": 28, "101": 9, "101230": 51, "101978": 74, "102": [9, 22], "1024": [1, 6, 15, 20, 23, 25, 29, 36, 43, 44, 51, 54, 70, 73, 74, 75, 79, 82, 83, 92], "103": 9, "104": 93, "10438": 90, "1045": 92, "1047": 73, "1050": 92, "1051": 75, "1059": 73, "106563": 74, "1072": 92, "107501": 74, "10764": 53, "10774": 0, "1079": 19, "108": 74, "1082": 92, "10858": 36, "10b": [69, 82, 93], "10m": 22, "11": [0, 10, 12, 20, 23, 25, 64, 73, 74, 77, 82, 92], "11023": 73, "110804": 74, "110b": 93, "111": [22, 26], "111302": 74, "111618": 74, "111668": 74, "1118": 93, "1123": 93, "1134": 89, "1135": 92, "1141": 92, "1148": 93, "11489": 20, "11490": 73, "1151": 20, "115716": 74, "1160": [30, 37], "117": 74, "1178": 73, "1181": 93, "1183": 93, "119": 73, "11943": 73, "11947": 36, "1196": 20, "11b": [91, 93], "12": [0, 10, 15, 22, 26, 36, 64, 66, 67, 73, 74, 77, 79, 82, 92], "1207": 53, "1212": 92, "121847": 73, "1219": 20, "122": 73, "1225": 82, "12288": 73, "123": [30, 38, 39], "1234": [70, 84], "1239": 93, "1242": 93, "1248": 93, "125": 73, "1252": [19, 73], "1256": 93, "125m": [12, 15], "126": 73, "1267": 93, "127": 82, "1272": 92, "128": [0, 1, 5, 9, 10, 13, 16, 20, 21, 22, 23, 24, 25, 26, 30, 36, 38, 39, 49, 56, 70, 73, 74, 93], "1284": 93, "1287": 77, "1290": 92, "1291504": 75, "1293": 19, "12945": 20, "129498": 20, "13": [5, 10, 24, 28, 64, 73, 74, 75, 82, 92], "1300": 45, "13044": 53, "131072": [73, 75], "13195": 73, "132": [73, 74], "1323": 93, "1328": 93, "1329": 93, "133": 93, "13368": 73, "1337": 93, "1341": 20, "1343": 93, "1344": 93, "13525": 73, "13598": 73, "1363": 53, "137": 73, "1378": 92, "139": 74, "1392": 93, "13b": 22, "14": [10, 15, 25, 64, 73, 74, 77, 79, 80, 92], "140g": 19, "141": 23, "1418": 73, "141gb": [21, 74], "142": 28, "1424": 93, "1436": [20, 93], "1437": 92, "144": 77, "1446": 93, "1447": 93, "14480": 73, "1449": 93, "145": [79, 80], "1459": 92, "146": [79, 80], "1467": 93, "147": [75, 77, 79, 80], "1480": 93, "1486": 93, "149": [92, 93], "15": [10, 26, 64, 73, 74, 80, 82, 92], "150": 72, "1500": 74, "15043": 36, "1514": 93, "1529": 93, "1534": 93, "1535": 93, "1536": 20, "1537": 93, "1539": 93, "154": 26, "1552": 93, "1556": 92, "15585": 73, "1562": 93, "1564": [75, 79, 80], "158": 20, "1583": 93, "1584": 20, "1585": 75, "15889": 53, "1589": 93, "1590": 93, "1597": 77, "15u": 28, "16": [0, 5, 10, 11, 12, 16, 20, 22, 25, 26, 30, 33, 35, 55, 56, 57, 64, 65, 73, 74, 75, 76, 82, 83, 84, 89, 90, 92], "160": 93, "1607": 73, "161": [30, 37, 73], "1625": 77, "1626": 93, "163": 21, "1637": 93, "16384": [77, 79], "164": 26, "1642": 93, "1650": 93, "1660": 93, "1669": 93, "167": [73, 74], "1672": 92, "1674": 93, "1675": 93, "1676": 93, "168": 26, "16e": 91, "16x": [27, 89], "17": [0, 2, 10, 20, 64, 73, 74, 79, 92, 94], "1706": 82, "1721": 92, "1723": 93, "17233": 20, "173": 26, "1732": 93, "17323": 90, "1738": 93, "174": 74, "1741966075": 88, "1742": 93, "17453": 29, "17453v3": 1, "175": 74, "175b": 23, "176": 73, "176064": 20, "1762": 93, "1799": 93, "17b": 91, "18": [2, 10, 28, 64, 71, 73, 74, 92], "180": [26, 89], "180000000": 0, "180b": [25, 73], "1811": 53, "1815": 93, "181540": 20, "182": 74, "1822": 36, "183": 74, "1834": 93, "184": 74, "185": [22, 73], "1851": 93, "18527": 36, "18533": 53, "18563": 73, "1861": 80, "1866": 80, "1885": 75, "1886": 93, "1889": 53, "1897": 93, "19": [2, 20, 64, 74, 80, 92], "1900": 53, "1909": 93, "191": 74, "192": 21, "1921": 20, "1926": 93, "1937": 93, "1939": 93, "1944": 79, "1953": 93, "1959": 73, "198": 26, "1985": 93, "1987": 93, "1993": 92, "1999": 93, "1_405b": 16, "1_70b": 16, "1b": [30, 33, 35, 38, 40, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 58, 59, 60, 62, 66, 67, 69, 88, 94], "1d": [5, 82, 87], "1e": [15, 82, 83, 84], "1e20f": 1, "1g": 92, "1gb": 2, "1k": [20, 26, 27, 28], "1m": 80, "1st": [22, 82, 89], "1u": [0, 1], "1x": 26, "1xh200": 21, "1ytic": 93, "2": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 19, 21, 22, 23, 25, 26, 27, 28, 30, 42, 44, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 64, 66, 67, 69, 70, 73, 74, 76, 77, 79, 80, 82, 84, 87, 90, 91, 92, 98], "20": [1, 6, 12, 13, 28, 30, 60, 62, 73, 74, 75, 79, 82, 87, 92], "200": [23, 70, 87], "2000": [28, 74], "20000": 74, "200mb": 28, "2017": 79, "2018": 93, "2023": [21, 92], "2024": 26, "2025": [20, 26, 73], "2028": 93, "203": 74, "2033": 80, "2039": 93, "204": [26, 74], "2040": 93, "2044": [79, 80], "2045": 79, "2048": [15, 20, 21, 23, 24, 28, 29, 49, 70, 73, 74, 75, 77, 78, 79, 80, 84, 87, 92, 93], "2056": 93, "206": 74, "20627": 36, "20685": 73, "2079": 92, "208": 74, "2081": [77, 79, 93], "2087": 93, "2089": 74, "209": 74, "20b": 93, "21": [12, 25, 26, 74, 79, 92, 93], "2101": 4, "2102": 74, "2106": 10, "2107": [53, 92], "210g": 19, "211": 26, "2113": 93, "2135": 93, "21367": 53, "2152": 93, "2158": 74, "2168": 20, "2169": 93, "21747": 73, "2176": 74, "21764": 73, "2182": 93, "2191": 93, "22": [28, 32, 74, 82, 92], "22000": 74, "22056": 73, "221": 73, "2210": 90, "2211": [82, 90], "2219": 93, "22213": 73, "2225": 92, "2232": 93, "224": 83, "2243": 93, "2263": 93, "227": 24, "2288": 93, "2294": 93, "22x": 27, "23": [73, 74, 92, 93], "2305": 92, "2306": 90, "2309": [1, 29], "232": 24, "2337": 53, "2352": 93, "2357": 93, "236": 26, "2366": 93, "2370": 93, "2373": 93, "2379": 93, "2388": 93, "239": 26, "2397": 73, "24": [0, 66, 67, 74, 92, 93, 94], "240": 74, "2401": 0, "2402": 10, "24189": 74, "2419": 93, "242": 74, "2425": 93, "2439": 93, "245": 26, "2458": 93, "2461": 79, "2466": 79, "2473": 93, "2474": [77, 79], "2484": 93, "2485": 93, "2487": 74, "249": 26, "25": [24, 26, 73, 74, 91, 93], "250": [20, 26], "2500": 74, "25032": 73, "252u": 28, "253": [26, 74], "2552": 93, "256": [1, 20, 21, 24, 28, 59, 70, 73, 74, 82, 92, 93], "25603": 73, "2573": 93, "2581": [77, 79], "2590780": 73, "259840": 89, "26": [73, 74, 77, 88], "260": 74, "2602": 36, "2628": [79, 80], "263": [21, 36, 53], "2640": 80, "2649": 92, "2671": 20, "2677": 93, "26778": 73, "2679": 77, "2685": 93, "2688": 53, "2691": 93, "27": [74, 93], "270": 74, "2712": 93, "274": [20, 93], "2742": 75, "275": 93, "27556": 53, "276": 74, "278": [36, 53, 74], "2782": 93, "2787": 93, "2796": 93, "28": [26, 73, 74, 92], "2820": 92, "2826700": 20, "28390": 73, "287113": 73, "288": 93, "29": [74, 89], "292": 74, "2939": 92, "294": 74, "297": 36, "29889": 53, "29892": 36, "299": [26, 73], "29962": 36, "2998": 92, "2b": [19, 64, 73], "2cta": 28, "2d": [12, 82, 83, 90], "2k": [20, 26, 27, 28], "2m": 80, "2nd": 82, "2u": 1, "2x": [22, 23], "3": [0, 1, 3, 5, 7, 9, 10, 17, 21, 22, 23, 25, 26, 27, 28, 43, 44, 46, 48, 52, 54, 58, 59, 64, 66, 67, 69, 70, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 87, 88, 92, 93, 94, 95], "30": [0, 12, 20, 26, 70, 74, 75, 77, 80, 82, 89], "300": [24, 73], "3000": [73, 74], "30000": 74, "30065": 73, "3019": 73, "3021": 20, "3022": 73, "303": 23, "3031": 79, "304": [36, 53], "3040": [75, 79, 80], "306": 36, "3072": 20, "30990": 73, "30b": 25, "30x": 25, "31": [11, 74, 75, 79, 80], "311": 74, "3132": 73, "315": [26, 74], "318": 74, "32": [1, 5, 9, 11, 20, 22, 23, 29, 36, 53, 70, 73, 74, 75, 82, 83, 84, 87, 88, 89, 90, 92, 93, 94], "3201": 75, "321": 73, "322": [36, 53], "3276": [75, 79, 80], "32768": 82, "3291": 92, "32b": 93, "32k": 93, "32x": 25, "33": [74, 92], "332": 74, "3328": 92, "3338": 75, "338": [26, 36, 53], "3389": 77, "33x": 27, "34": [20, 74], "340": [26, 74], "341": 23, "3442": 92, "3445": 92, "3452": 92, "3476": 20, "349": 23, "34b": 93, "35": [0, 70, 74], "351": 74, "3555": 92, "35611": 20, "357": 74, "36": [26, 74, 76, 77], "3671": 73, "368": 26, "37": 73, "370": 74, "371": 74, "374": 74, "375": 74, "3763": 26, "379": 74, "38": [73, 74], "384": [20, 74], "3863": 74, "387": 74, "387b12598a9e": 73, "3885": 20, "3887": 92, "39": [26, 74], "3914": 74, "3936": 73, "3977": 92, "399": 74, "3_1": 91, "3_3": 91, "3b": [34, 39, 61], "3d": [5, 82, 87], "3rd": 82, "3u": 1, "3x": [25, 26, 28], "4": [0, 1, 2, 7, 9, 10, 11, 12, 16, 19, 23, 25, 26, 27, 28, 30, 36, 43, 44, 49, 53, 54, 55, 56, 57, 64, 70, 73, 74, 75, 77, 78, 79, 80, 81, 82, 84, 87, 88, 89, 90, 91, 92, 93], "40": [6, 74, 77, 82, 93], "400": 28, "4000": 28, "403": 93, "405": 53, "405b": [73, 76], "4060": 89, "4066": 36, "408": 74, "4089": 80, "4096": [21, 28, 36, 73, 74, 77, 82, 83, 87], "40b": 25, "40gb": 29, "40x": 25, "41": 74, "41020": 73, "411": 73, "4117e": 73, "4133": 80, "41375": 73, "414": 20, "41607": 73, "4168": 20, "4192": 92, "42": [52, 73, 74], "4203099703668305365": 51, "4224": 74, "4248": 77, "4265": 73, "427": [53, 73, 74], "4280": 26, "43": [74, 88, 89], "433": 74, "437": 74, "438": 74, "44": [74, 89], "4408": 36, "442": 74, "4439": 73, "4451": 20, "4456": 74, "447": 74, "448": 74, "449": 93, "4493": [20, 79, 80], "4497": 74, "44x": 25, "45": [9, 74, 91, 93], "450": 74, "45000000000": 9, "453": 74, "4566": 74, "459": 74, "46": 25, "4600": 28, "462": 74, "463": 74, "4653": 36, "4656": 74, "466": 74, "4667": 74, "47": [25, 77], "4701": 73, "471": 74, "472": 36, "475": 74, "477": 74, "478": 93, "47x": 25, "48": [74, 77, 89, 93], "481": [22, 74], "482": 93, "488": 74, "49": [74, 77], "49152": 20, "495": 74, "496": 11, "4963": 73, "49b": 91, "4b": 93, "4bit": 21, "4gb": 28, "4u": 1, "4x": [21, 22, 23], "5": [0, 1, 9, 10, 12, 13, 15, 21, 22, 23, 25, 26, 27, 28, 34, 39, 44, 45, 51, 54, 61, 69, 70, 73, 74, 79, 82, 84, 87, 91, 92, 93], "50": [0, 25, 45, 70, 73, 74, 93], "500": [26, 28, 74], "5000": 74, "500000": 84, "5001": 53, "5007": 36, "500m": 25, "50272": 15, "505143404006958": 30, "5064": 74, "5073": 92, "51": 74, "512": [1, 10, 13, 23, 24, 70, 73, 74, 77, 79, 84], "5120": 20, "512mb": 2, "514": 74, "518": [36, 74], "51b": [91, 93], "51x": 25, "52269": 74, "524": 74, "525": 74, "526": [53, 74, 93], "52667": 74, "529": 74, "5299": 77, "53": [73, 79, 80], "5305": 77, "531": 74, "54": [25, 74], "540": 73, "543": 74, "544": 74, "5496": 77, "5497": 74, "55": [25, 73, 74], "5500": 74, "5510": 73, "5514": 73, "5530": 74, "554": 74, "557": 74, "559": 74, "56": [25, 74], "560": 21, "562": [10, 13], "56401920000": 30, "565": 74, "567": 74, "568": [73, 74], "57": [73, 74], "571": 74, "572": 74, "5739": 20, "5742": [77, 79], "579": 74, "58": [26, 74, 79], "580": 74, "5821": 74, "5830": 92, "5874": 92, "5877": 77, "5879": 92, "588": 74, "58x": 26, "59": 73, "590": [36, 74], "5918": 92, "5942": 20, "5957": 92, "5976": 77, "598": 74, "5980": 77, "5b": 93, "5th": [28, 82], "5u": 1, "5x": [22, 25, 26], "6": [0, 1, 6, 9, 10, 12, 23, 25, 26, 27, 28, 30, 44, 54, 70, 74, 82, 87, 91, 92, 93], "60": [0, 74], "600": 31, "6000": 73, "602": 74, "6049": 77, "6059": 73, "6064": 92, "608": 74, "61": 74, "610": 74, "6100": 20, "6157": 92, "618": 74, "62": [26, 74, 79], "6255": 92, "626": 36, "6299": 92, "63": [43, 44, 54, 65, 73, 74, 79, 84, 89], "630": 74, "63266": 75, "63307": 75, "63308": 75, "63331": 75, "63374": 75, "634": 74, "63456": 75, "6345624": 75, "6372": 77, "639": 93, "64": [0, 1, 5, 6, 15, 20, 22, 23, 29, 34, 39, 58, 61, 74, 79, 82, 83, 84, 89, 93], "640": [21, 74], "640gb": 28, "6452": 80, "6475": 79, "649": 93, "64x": 26, "65": [67, 74], "65024": 92, "6523": 80, "653": 74, "654": 23, "6550": 77, "6554": 79, "656": 74, "657": 74, "659": 74, "6591": 73, "66": [26, 74], "661": 74, "6628": [79, 80], "6678": 89, "6684": 80, "6695": 89, "67": [25, 26, 74], "6701": 20, "671": 20, "67108864": 65, "671b": 27, "673": 93, "675": 73, "6753e": 73, "6769": 79, "679": 22, "68": [25, 26, 74, 80], "682": 74, "6825": 73, "683": 74, "684": 26, "685": 74, "6852": [77, 79], "686": 74, "6862": 73, "6890": 92, "69": [25, 26, 74, 80, 88], "6925": 73, "6938": 36, "695": 93, "696": 74, "697": 28, "6975": 77, "6976": [75, 79, 80], "698": 74, "6a": 21, "6b": [22, 73, 82, 93], "6x": 23, "7": [0, 1, 9, 10, 21, 22, 25, 26, 27, 28, 44, 54, 64, 65, 66, 67, 73, 74, 75, 82, 87, 92], "70": [0, 25, 80, 89], "700": 31, "7000": 73, "701": 93, "7031": 77, "704": 74, "705": [28, 93], "706": 74, "7063": 73, "707": 74, "7072": 74, "709": 73, "7090": 92, "70b": [5, 19, 23, 25, 54, 75, 77, 78, 79, 80, 81, 91, 93], "70g": 19, "71": [26, 73, 74], "711": 74, "712": 74, "7134": 92, "7136": 75, "714": 74, "7144": 92, "7168": [26, 28], "717": 74, "7187": 74, "7188": 20, "72": [74, 76], "722": 74, "727": 74, "72b": [91, 93], "73": [26, 74], "732": 74, "734": 74, "736": 74, "737": 74, "7382": 74, "739": 93, "74": [26, 74], "741": [74, 93], "742": 74, "745": 74, "7456": 20, "74561": 20, "747": 74, "7480": 75, "75": [25, 73, 93], "750": [23, 74], "7502": 75, "7520": 20, "755": 31, "7584": 20, "75903": 74, "76": 74, "7607": 79, "7621": 74, "7638": [75, 79, 80], "767": 74, "768": [15, 83], "77": 74, "772": 74, "7743": 75, "7770": 75, "78": [26, 74, 77], "780": 73, "7842": 77, "78509": 74, "7876": 79, "79": [73, 89], "7900": 92, "7933": 79, "794": [74, 93], "7949": 92, "7977": 77, "7a": 21, "7b": [10, 12, 13, 25, 30, 43, 44, 54, 73, 74, 88, 91, 93], "7x": [22, 26], "8": [0, 1, 5, 9, 10, 11, 15, 16, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 36, 37, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 55, 56, 57, 59, 64, 66, 67, 70, 73, 74, 75, 76, 77, 81, 82, 83, 84, 88, 89, 90, 92], "80": [0, 6, 23, 26, 28, 65, 74, 93], "800": [21, 74, 93], "8000": [30, 33, 34, 35, 37, 38, 39, 60, 61, 62, 88], "8002": 73, "8005": 74, "803": 21, "8048": 73, "80gb": [22, 25, 29, 74, 75, 77, 78], "81": [26, 74, 77], "810": 74, "8149": 92, "8179": 92, "819": 23, "8192": [29, 70, 73, 74, 75, 79, 82, 83, 92, 93], "82": [26, 74, 77], "820": 73, "8212": 1, "8218": 92, "822": 74, "8225": 77, "825": 93, "8259": 73, "83": 74, "8307": 80, "8351": 73, "838": 74, "84": [26, 74], "840": 74, "841": 74, "8441": 73, "85": [20, 25, 73, 74, 93], "850": 74, "851": 74, "854": 74, "86": [65, 74], "863": 73, "866": 74, "867": 74, "8672": 92, "87": [25, 74], "8779": 92, "88": [74, 77, 80], "8804": 75, "8828": 92, "8841": 77, "89": [25, 26, 65, 74, 91], "893": 74, "8932": 73, "8958": 80, "896": [53, 74], "8a": 24, "8b": [46, 54, 69, 73, 88, 91, 94], "8bit": 22, "8tb": 23, "8x": 28, "8x7b": [4, 73, 91, 93], "8xb200": 26, "8xgpu": 28, "8xh100": 24, "8xh200": 21, "9": [0, 1, 10, 12, 19, 22, 26, 27, 44, 54, 59, 64, 74, 77, 82, 92], "90": [0, 20, 65, 70, 73, 74, 75, 77, 81, 89], "9007": 20, "9028": 92, "907": 22, "9087": 80, "91": 74, "910": 74, "9101": 74, "911": 74, "9115": 80, "912656": 20, "913": 74, "9184": 77, "92": [26, 74], "920": 74, "9203": 77, "9214": 74, "924": 15, "925": 74, "9274": 75, "93": 74, "935": 93, "9353e": 75, "9379": 20, "94": 74, "94022": 74, "941": [21, 24], "943": 53, "944": 74, "946": 21, "947": 74, "9494": 79, "95": [30, 37, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 59, 66, 67, 74, 75, 81, 88], "9521": 92, "953": 74, "9537": 77, "954": 28, "956": 74, "957": 74, "96": [21, 26, 28, 74, 77, 93], "960": 21, "9606": 28, "961": 74, "9613": 28, "9623": 79, "9629": 28, "963": 74, "9639": 74, "96583": 74, "967": 93, "9692": 92, "97": [28, 73, 74, 77], "970": 74, "98": 74, "983": 93, "987": 93, "99": [9, 26, 31, 74], "990": 74, "991": 74, "992": 93, "9928": 80, "9938": 20, "9982": [79, 80], "9x": [23, 24], "A": [0, 1, 2, 3, 5, 6, 8, 10, 12, 15, 16, 19, 20, 25, 26, 52, 55, 56, 57, 58, 70, 72, 73, 74, 82, 87, 93, 95, 97], "AND": 82, "And": [12, 19, 27, 28, 82, 83, 89], "As": [4, 5, 7, 10, 12, 16, 18, 27, 36, 77, 80, 81, 82, 89, 90, 92, 97, 98], "At": [14, 28, 58, 77, 83, 89], "But": [5, 8, 71], "By": [0, 1, 2, 6, 12, 26, 28, 36, 65, 70, 73, 77, 80, 82, 92, 97], "For": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 36, 40, 52, 55, 56, 57, 63, 65, 69, 73, 74, 75, 76, 77, 79, 80, 81, 82, 87, 88, 89, 92, 93, 95, 96, 97, 98, 99], "If": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16, 17, 19, 25, 27, 29, 30, 31, 32, 65, 66, 67, 69, 70, 71, 73, 75, 76, 77, 79, 80, 81, 82, 84, 87, 89, 91, 92, 93, 95, 97, 98, 99], "In": [0, 1, 2, 7, 8, 11, 12, 16, 17, 19, 20, 22, 25, 26, 27, 28, 32, 36, 54, 58, 64, 65, 73, 74, 75, 76, 77, 79, 80, 82, 88, 89, 90, 91, 92, 93, 97, 98, 99], "It": [0, 1, 3, 5, 6, 7, 10, 12, 14, 16, 17, 18, 20, 21, 24, 25, 26, 27, 28, 29, 36, 51, 58, 65, 70, 71, 73, 74, 77, 78, 79, 80, 81, 82, 88, 90, 92, 95, 96, 97, 99], "Its": [5, 82, 97], "NOT": 82, "No": [0, 2, 9, 58, 73, 75], "Not": [1, 25], "ON": [73, 77, 79, 80], "OR": 82, "Of": [26, 93], "On": [5, 9, 65, 67, 72, 76, 80, 82, 93], "One": [2, 15, 16, 79, 82, 92, 96], "Or": [82, 87, 94], "That": [3, 5, 6, 9, 16, 71, 77, 82], "The": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 64, 65, 66, 67, 69, 70, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99], "Their": 28, "Then": [10, 19, 27, 30, 31, 73, 75, 82, 95, 98], "There": [2, 5, 6, 7, 8, 9, 10, 15, 19, 23, 26, 27, 28, 36, 65, 67, 69, 82, 85, 89, 90, 92, 93, 96, 97, 98, 99], "These": [2, 12, 19, 21, 23, 24, 26, 28, 36, 73, 75, 76, 83, 85, 88, 93], "To": [2, 3, 5, 9, 10, 12, 13, 16, 17, 18, 19, 20, 23, 26, 27, 65, 69, 70, 71, 72, 73, 74, 77, 79, 80, 81, 82, 88, 89, 90, 93, 94, 95, 97, 98, 99], "Will": 0, "With": [5, 6, 12, 16, 31, 36, 49, 64, 73], "_": [0, 3, 17, 85], "__all__": 95, "__call__": 52, "__init__": [7, 14, 16, 17, 52, 70, 73, 92, 93, 95, 97, 99], "__main__": [40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 66, 67, 69, 75, 77, 80, 81, 88, 93, 94, 95], "__name__": [40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 66, 67, 75, 77, 80, 81, 88, 93, 94, 95], "__post_init__": 93, "__repr__": 93, "_capac": 1, "_context_logits_auto_en": 70, "_cpp_gen": 3, "_create_tensor": 16, "_explicitly_disable_gemm_plugin": 85, "_generation_logits_auto_en": 70, "_handl": 1, "_mark_output": 92, "_mpi_sess": 70, "_note": 5, "_num_postprocess_work": 70, "_path": 20, "_postproc_param": 70, "_postprocess_result": 70, "_postprocess_tokenizer_dir": 70, "_reasoning_pars": 70, "_return_log_prob": 70, "_run": 92, "_runtim": 87, "_str_to_trt_dtype_dict": 82, "_torch": [73, 93, 94, 95, 96, 97], "_torchllmargs__context": 70, "_trtllmargs__context": 70, "_unsign": 1, "_util": 82, "a10": 29, "a100": [6, 19, 29], "a10g": 29, "a2": 93, "a30": 29, "a40": 29, "a8": 90, "a_": 82, "a_1": 82, "a_2": 82, "a_n": 82, "a_sf": 82, "aarch64": 91, "ab": [10, 29, 82, 90], "abbrevi": 30, "abc": 27, "abcd": 27, "abi": [65, 93], "abil": [71, 73], "abl": [5, 22, 26, 67, 73, 79, 82, 93], "ablat": [27, 28], "abnorm": 93, "abort": [70, 93], "about": [0, 1, 3, 19, 20, 21, 22, 24, 25, 28, 51, 58, 59, 64, 73, 75, 77, 78, 80, 82, 88, 89, 92, 93], "abov": [2, 10, 11, 16, 19, 20, 25, 28, 36, 65, 73, 74, 75, 77, 80, 89], "absenc": 6, "absorb": 26, "abstract": [80, 83], "ac": 93, "acc": 82, "acceler": [5, 11, 12, 22, 23, 24, 25, 29, 71], "accept": [0, 1, 12, 20, 36, 46, 47, 48, 49, 50, 65, 70, 75, 77, 82, 87, 88, 91, 93, 97], "accept_length": 87, "acceptancelength": 0, "acceptancer": 0, "acceptancethreshold": 0, "acceptedlen": 1, "acceptedlengthscumsum": 1, "acceptedpath": 1, "acceptedpathid": 1, "acceptedtoken": 1, "acceptedtokenslen": 1, "access": [3, 32, 45, 70, 73, 75, 82, 88, 93], "accessor": 1, "accommod": [4, 96, 98], "accomplish": 76, "accord": [5, 17, 59, 82, 83, 97], "accordingli": 17, "account": [16, 20, 31, 55, 56, 57, 65], "accumul": [0, 5, 6, 29, 70, 82, 87, 88], "accur": [21, 27, 45, 73, 75, 93], "accuraci": [21, 26, 28, 29, 77, 81, 82, 90, 93], "achiev": [2, 12, 20, 21, 25, 26, 28, 65, 74, 75, 77, 79, 81, 95], "across": [2, 4, 5, 6, 7, 16, 17, 23, 26, 30, 74, 76, 77, 79, 80, 82, 87], "act": 26, "act_fn": 83, "act_typ": [16, 82], "action": 54, "activ": [0, 1, 5, 7, 16, 20, 21, 22, 25, 26, 28, 29, 76, 82, 90, 91, 93, 99], "activation_scaling_factor": 15, "activationtyp": [16, 82], "active_request": 99, "actual": [7, 8, 12, 20, 25, 26, 27, 29, 77, 79, 80, 81, 93, 98], "ad": [1, 5, 6, 7, 9, 12, 13, 19, 27, 28, 32, 52, 64, 72, 76, 79, 80, 82, 84, 87, 93, 94, 96], "ada": [5, 25, 59, 65, 71, 77, 91, 93], "adalayernorm": 83, "adalayernormcontinu": 83, "adalayernormzero": 83, "adalayernormzerosingl": 83, "adapt": [0, 10, 27, 40, 41, 70, 82, 83, 93, 95], "adapter_s": 10, "adapters": 1, "add": [1, 3, 5, 7, 10, 14, 15, 16, 19, 27, 31, 32, 52, 54, 65, 69, 70, 73, 75, 77, 80, 82, 87, 92, 93, 95, 98], "add_activ": 16, "add_argu": 54, "add_bias_linear": 84, "add_generation_prompt": 26, "add_input": 82, "add_output": 82, "add_padding_request": 98, "add_qkv_bia": 84, "add_rmsnorm": 26, "add_sequ": 87, "add_special_token": [26, 70, 87, 93], "addcumlogprob": 93, "added_kv_proj_dim": 83, "added_proj_bia": 83, "addit": [0, 5, 6, 10, 12, 16, 19, 23, 27, 28, 30, 36, 45, 65, 70, 73, 74, 76, 77, 79, 82, 83, 90, 91, 92, 93, 97, 98], "addition": [2, 73, 75, 77, 80, 95, 97], "additional_model_output": 70, "additional_opt": 57, "additionalmodeloutput": [0, 3, 70], "additionaloutput": [0, 3], "addr": 0, "address": [1, 17, 20, 25, 26, 28, 69, 80, 89, 93], "addresswiths": 1, "adequ": 83, "adher": 45, "adjust": [55, 70, 73, 75, 89, 99], "admin": 67, "adopt": [6, 19], "advanc": [12, 16, 24, 26, 27, 28, 29, 42, 46, 47, 49, 50, 65, 70, 82, 93, 97], "advantag": [6, 71], "advers": [21, 29], "advertis": 73, "advis": 2, "affect": [11, 19, 20, 29, 75, 77, 79, 80, 89], "affin": 83, "after": [0, 1, 3, 5, 7, 8, 9, 10, 12, 16, 17, 26, 27, 28, 29, 30, 31, 51, 54, 65, 69, 70, 73, 77, 79, 80, 81, 82, 83, 85, 88, 89, 93, 97, 99], "again": [16, 75, 77, 80, 92], "against": [65, 73], "agent": 23, "agentdesc": 0, "agentnam": 0, "agentst": 0, "aggreg": 28, "aggress": [15, 27, 77, 81], "agre": [69, 88], "ahead": [0, 5, 12], "ai": [20, 22, 26, 30, 37, 40, 42, 43, 44, 46, 47, 48, 49, 50, 54, 59, 66, 67, 71, 72, 75, 81, 82, 88, 91, 93, 94], "aidc": 93, "aim": [4, 15, 20, 26, 71, 73, 75, 77, 93], "ainsli": 21, "air": 93, "aka": 82, "akhoroshev": 93, "al": 21, "albeit": 12, "alessionetti": 93, "algorithm": [0, 5, 6, 12, 15, 16, 19, 25, 26, 27, 28, 70, 73, 77, 82, 93], "alia": [70, 83, 84], "alibi": 82, "alibi_bias_max": [82, 83], "alibi_scal": 82, "alibi_slop": 82, "alibi_with_scal": 82, "align": [73, 93, 99], "align_corn": 82, "all": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 17, 19, 20, 23, 26, 27, 28, 52, 55, 56, 57, 58, 65, 70, 71, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 85, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99], "all_reduce_param": [82, 83], "allbitset": [0, 1], "allgath": [16, 28, 29, 80, 82, 93], "allgeneratedtoken": 0, "alllayersdrafttokenid": 1, "alllayersdrafttokenidspredecessor": 1, "alllayersscor": 1, "alloc": [0, 1, 2, 5, 8, 9, 30, 36, 70, 81, 82, 87, 89, 92, 93, 96, 97, 98, 99], "allocateipcmemori": 1, "allocatespeculativedecodingbuff": 1, "allocnewblock": 0, "allocnewblocksperrequest": 0, "alloctotalblock": 0, "alloctotalblocksperrequest": 0, "allot": 0, "allottedtimem": [0, 93], "allow": [0, 1, 2, 3, 5, 6, 9, 12, 15, 21, 24, 28, 29, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 82, 85, 92, 93, 96, 99], "allowed_token_id": 52, "allreduc": [16, 26, 28, 29, 80, 82, 93], "allreducebuff": 1, "allreducefusionkernel": 26, "allreducefusionop": 82, "allreduceparam": [82, 83], "allreducestrategi": [11, 82], "almost": [16, 28, 77, 79, 89], "alon": 4, "along": [5, 12, 18, 65, 82, 93], "alpaca": 10, "alpha": [70, 82, 83, 93], "alphabet": 82, "alreadi": [0, 5, 7, 9, 18, 20, 26, 27, 28, 70, 77, 79, 81, 82, 93, 95, 98], "also": [0, 2, 3, 5, 7, 12, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28, 29, 30, 36, 49, 51, 52, 65, 69, 70, 73, 74, 75, 76, 77, 78, 79, 82, 83, 88, 89, 90, 93, 95, 96, 97, 98], "altair": 93, "alter": [3, 7], "altern": [3, 26, 52, 65, 95, 96], "although": [7, 16, 73, 77, 80], "alwai": [0, 1, 3, 5, 6, 9, 15, 16, 19, 28, 53, 70, 79, 80, 82, 92], "always_share_across_beam": 87, "am": [42, 46, 47, 49, 50, 52, 59, 75, 81, 87], "ambigu": 1, "amd": 93, "amen": [0, 3, 70], "among": [32, 82], "amongst": 82, "amount": [0, 9, 16, 28, 29, 70, 73, 79, 81, 87, 89, 92], "amper": [22, 65, 71, 91, 93], "an": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 26, 27, 28, 29, 30, 36, 42, 45, 46, 47, 48, 49, 50, 52, 59, 65, 67, 69, 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 92, 93, 95, 96, 97, 98, 99], "analysi": [7, 26, 27, 28, 64, 89], "analysispatternmanag": 7, "analyt": 22, "analyz": [7, 75], "ani": [0, 1, 2, 3, 7, 8, 12, 17, 19, 20, 28, 30, 52, 65, 69, 70, 71, 73, 74, 79, 80, 81, 82, 84, 87, 92, 95, 96, 97], "announc": [20, 21, 22, 24], "anoth": [0, 1, 5, 7, 10, 19, 22, 26, 27, 28, 30, 79, 82, 92, 97, 99], "answer": [27, 45], "antialia": 82, "antonin": [42, 46, 47, 49, 50], "anybitset": [0, 1], "anyth": [58, 74], "aotman": 93, "apart": 36, "api": [2, 6, 9, 12, 14, 15, 16, 18, 20, 27, 28, 36, 37, 49, 55, 56, 57, 64, 65, 71, 72, 73, 74, 77, 78, 80, 81, 82, 89, 92, 94], "api_kei": [30, 60, 61, 62], "app": [65, 93], "appar": 71, "appear": [0, 5, 6, 51, 67, 70, 82, 92, 93], "append": [27, 52, 59, 72, 82, 99], "append_paged_kv_cach": 97, "appl": 93, "appli": [0, 2, 3, 5, 7, 10, 12, 15, 16, 17, 26, 27, 28, 29, 65, 70, 71, 73, 82, 83, 87, 90, 93, 97], "applic": [9, 12, 22, 25, 26, 28, 30, 33, 34, 35, 67, 69, 71, 72, 88, 92, 93, 99], "apply_batched_logits_processor": [52, 70], "apply_chat_templ": [26, 45], "apply_llama3_sc": 82, "apply_query_key_layer_sc": [83, 84], "apply_residual_connection_post_layernorm": 84, "apply_rotary_pos_emb": 82, "apply_rotary_pos_emb_chatglm": 82, "apply_rotary_pos_emb_cogvlm": 82, "apply_silu": 82, "applybiasropeupdatekvcach": 93, "applyrop": 26, "appreci": 28, "approach": [0, 2, 4, 7, 9, 11, 12, 26, 27, 28, 69, 73, 81], "appropri": [25, 36, 92], "approv": 52, "approxim": [28, 65, 83], "apt": [20, 31, 65, 66, 67], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 42, 45, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 60, 61, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99], "arang": 82, "arbitrag": 73, "arbitrari": [17, 93], "arbitrary_types_allow": 70, "architectur": [2, 4, 6, 9, 15, 22, 27, 28, 65, 71, 84, 87, 91, 93, 94], "arctic": [91, 93], "area": [28, 59], "aresult": 36, "arg": [0, 7, 19, 30, 54, 70, 83, 84, 87, 93], "arglist": 7, "argmax": 82, "argpars": 54, "argument": [2, 3, 20, 30, 36, 49, 52, 65, 69, 70, 73, 76, 82, 89, 93, 97], "argumentpars": 54, "aris": 65, "arithmet": 16, "armor": 51, "around": [1, 15, 19, 71, 75, 80], "arrai": [0, 1, 70, 82, 87], "arrayview": [0, 1], "arriv": [0, 4], "arrivaltim": 0, "arrow": 82, "art": [20, 26], "articl": [5, 12, 26, 27], "artifici": 71, "artist": 59, "arxiv": [0, 1, 4, 10, 29, 82, 90], "as_dtyp": 82, "as_lay": 7, "as_shap": 82, "ascii": 82, "asciichar": 1, "ask": [51, 58, 92], "aspect": 5, "assembl": [16, 18], "assert": [7, 82, 92, 93, 99], "assert_valid_quant_algo": 84, "assign": [0, 2, 19, 83, 85, 95], "assist": [6, 30, 33, 34, 45, 60, 61, 69, 88], "assistant_model": 6, "associ": [1, 3, 4, 10, 28, 65, 75, 82], "asssembl": 12, "assum": [1, 3, 9, 10, 12, 13, 20, 27, 28, 70, 73, 82, 84, 87], "assumpt": [12, 29], "async": [36, 47, 48, 70, 73, 87], "asynchron": [1, 3, 36, 40, 41, 70], "asyncio": [47, 48], "asyncllmengin": 93, "atom": 1, "attach": [2, 20], "attempt": [0, 2, 74, 75, 77], "attend": 81, "attent": [0, 1, 2, 6, 8, 9, 10, 12, 14, 16, 17, 20, 21, 29, 64, 70, 82, 87, 88, 89, 92, 93, 94, 95, 98], "attention_backend": [95, 97], "attention_head_s": [82, 83], "attention_mask": [82, 83, 84, 87, 97], "attention_mask_param": 84, "attention_mask_typ": 83, "attention_multipli": 84, "attention_output": 92, "attention_output_orig_quant_scal": 82, "attention_output_sf_scal": 82, "attention_packed_mask": [82, 83], "attention_param": [83, 84], "attention_qk_half_accumul": 93, "attention_window_s": 8, "attentionconfig": 0, "attentionheads": 1, "attentionmask": 97, "attentionmaskparam": 83, "attentionmasktyp": [82, 83], "attentionmetadata": 95, "attentionparam": [83, 84], "attentiontyp": 0, "attn_backend": [70, 97], "attn_bia": 84, "attn_dens": [10, 29], "attn_forward_funcnam": 83, "attn_k": [10, 29], "attn_logit_softcap": 84, "attn_logit_softcapping_scal": 82, "attn_metadata": 95, "attn_processor": 84, "attn_q": [10, 29], "attn_qkv": [10, 29], "attn_v": [10, 29], "attribut": [0, 1, 3, 7, 17, 19, 87], "audio": [87, 93], "audio_engine_dir": 87, "audio_featur": 87, "audio_path": 87, "authent": [69, 75, 88], "authorized_kei": [31, 32], "auto": [0, 1, 2, 3, 5, 6, 11, 13, 16, 42, 51, 70, 73, 80, 82, 84, 85, 86, 93], "auto_deploi": 93, "auto_deploy_config": 70, "auto_parallel": [29, 42, 70, 93], "auto_parallel_config": 70, "auto_parallel_world_s": [42, 70], "auto_quantize_bit": 86, "autoawq": 93, "autodeploi": 93, "autogptq": 93, "autom": [45, 93], "automat": [0, 3, 7, 11, 16, 17, 26, 30, 36, 40, 41, 69, 71, 73, 75, 82, 89, 90, 93], "autoparallelconfig": 70, "autopp": 93, "autoq": 93, "autoregress": [0, 12, 97, 98], "autotoken": 36, "autotun": [70, 93], "autotuner_en": [51, 70], "aux": 89, "auxiliari": 12, "avaiable_block": 99, "avail": [0, 1, 3, 7, 9, 16, 21, 23, 30, 36, 42, 46, 47, 49, 50, 52, 65, 71, 73, 79, 80, 81, 87, 88, 89, 90, 93, 94, 97, 98], "averag": [0, 12, 20, 27, 70, 73, 74, 75, 77, 79, 80], "avg": [73, 75, 82], "avg_pool2d": 82, "avgnumdecodedtokensperit": 0, "avgpool2d": 83, "avoid": [1, 2, 19, 26, 27, 28, 65, 69, 87, 89, 93], "awai": [79, 80], "await": [0, 3, 36, 47, 48], "awaitcontextrespons": 0, "awaitgenerationrespons": 0, "awaitrespons": [0, 2, 3], "awar": [2, 5, 21, 92], "awq": [25, 36, 59, 64, 91, 93], "awq_block_s": 86, "ax": 82, "axi": [24, 82], "b": [1, 2, 7, 10, 16, 21, 22, 23, 24, 72, 82, 84, 87, 93], "b200": [27, 28, 74, 93], "b_sf": 82, "back": [0, 2, 9, 11, 12, 46, 49, 67, 74, 93], "backbon": 71, "backend": [0, 2, 3, 12, 16, 18, 20, 27, 28, 30, 37, 45, 51, 52, 55, 56, 57, 64, 70, 72, 73, 74, 88, 93, 96, 98, 99], "backend_token": [0, 3], "backendagentdesc": 0, "backu": [0, 3, 70], "backward": 19, "bad": [0, 3, 70, 93], "bad_token_id": 70, "bad_words_data": 87, "bad_words_list": 87, "badword": 0, "badwordslen": 1, "badwordslist": 1, "badwordsptr": 1, "baichuan": [69, 90, 91, 93], "baichuan2": 91, "baichuanconfig": 84, "baichuanforcausallm": 84, "balanc": [4, 6, 12, 16, 28, 70, 79, 81], "band": 45, "bandwidth": [6, 16, 21, 22, 23, 25, 28, 45], "bangbang": 22, "bantoken": 0, "banword": 0, "bar": 70, "bare": [93, 94], "barissglc": 58, "barnardo": 51, "bart": [91, 93], "base": [0, 1, 2, 3, 9, 10, 11, 12, 14, 17, 18, 19, 20, 21, 22, 25, 26, 28, 29, 47, 48, 54, 65, 70, 71, 73, 79, 81, 82, 83, 84, 85, 86, 87, 89, 91, 93, 94, 95, 96, 98, 99], "base64": 61, "base_model": 10, "base_s": 83, "base_url": [30, 60, 61, 62], "baseagentconfig": 0, "basekvcachemanag": 0, "baselin": [25, 26, 27, 28, 75, 79, 80, 97], "baseline_fp8_engin": 77, "basellmarg": 70, "basemodel": 70, "baseresourcemanag": [96, 98], "basetransferag": 0, "bash": [16, 30, 32, 33, 34, 35, 37, 38, 39, 55, 56, 57, 72], "basic": [14, 72, 82], "basic_string_view": 0, "batch": [0, 1, 6, 9, 10, 11, 12, 13, 16, 18, 20, 22, 23, 25, 26, 27, 28, 29, 30, 52, 64, 68, 70, 73, 74, 75, 77, 78, 80, 81, 82, 83, 87, 88, 89, 92, 93, 95, 96, 97, 98, 99], "batch_beam_s": [5, 82], "batch_dim": 82, "batch_idx": 87, "batch_input_id": 87, "batch_manag": [0, 1, 98], "batch_schedul": 93, "batch_siz": [5, 7, 13, 15, 21, 24, 82, 83, 86, 87, 89, 97], "batchdon": 1, "batched_logits_processor": [52, 70], "batchedlogitsprocessor": [52, 70], "batchidx": 1, "batchindex": 1, "batching_typ": 70, "batchingtyp": [0, 70], "batchsiz": [0, 1, 6, 22], "batchsizelimit": 0, "batchsizet": 0, "batchslot": 1, "batchslotshostcopi": 1, "batchslotsrequestord": 1, "bc": 82, "beam": [0, 1, 6, 12, 18, 24, 29, 30, 36, 49, 64, 70, 82, 87, 89, 92, 93], "beam_search_diversity_r": [70, 87], "beam_width": [5, 6, 36, 82, 87, 93], "beam_width_arrai": 70, "beamhypothes": 1, "beamsearch": 0, "beamsearchbuff": 1, "beamsearchdiversityr": [0, 1, 6], "beamsiz": 0, "beamtoken": [0, 3], "beamwidth": [0, 1, 2, 3, 6, 70, 93], "beamwidtharrai": [0, 1, 6], "becam": 0, "becaus": [0, 3, 9, 20, 25, 26, 27, 28, 29, 36, 53, 58, 69, 73, 74, 75, 76, 77, 79, 81, 82, 89], "becom": [5, 6, 7, 9, 10, 16, 17, 25, 26, 28, 51, 71], "been": [0, 3, 4, 5, 19, 22, 23, 26, 28, 32, 54, 58, 65, 67, 70, 73, 77, 79, 82, 92, 93], "befor": [0, 1, 2, 3, 5, 7, 9, 10, 11, 15, 16, 17, 26, 27, 55, 56, 57, 64, 65, 67, 70, 71, 72, 76, 77, 79, 81, 82, 84, 87, 89, 92, 93, 95, 96, 97, 98, 99], "beforehand": 75, "begin": [12, 69, 71, 76, 93, 95], "behav": [0, 89], "behavior": [2, 5, 74, 79, 82, 87, 89, 93], "behaviour": [0, 82], "behind": [22, 28], "being": [0, 5, 9, 16, 19, 28, 58, 70, 79, 92, 93, 97], "believ": [51, 73], "belong": 79, "below": [0, 5, 6, 7, 8, 10, 20, 23, 24, 25, 27, 28, 31, 32, 73, 74, 77, 79, 80, 92], "bench": [20, 27, 40, 41, 58, 73, 74, 78, 93], "benchmark": [26, 27, 56, 64, 65, 72, 77, 78, 80, 88, 93], "benchmark_2nod": 30, "benefici": [28, 73, 79, 80], "benefit": [7, 9, 11, 23, 25, 27, 28, 29, 71, 79, 93], "bert": [29, 82, 90, 91, 93], "bert_attent": 82, "bert_attention_plugin": 29, "bert_context_fmha_fp32_acc": 29, "bertattent": 83, "bertattentionplugin": 82, "bertbas": 84, "bertforquestionansw": 84, "bertforsequenceclassif": [84, 91], "bertmodel": 84, "besid": 96, "best": [5, 16, 26, 27, 28, 53, 70, 72, 73, 76, 78, 79, 88, 93], "best_of": [70, 93], "best_path": 87, "best_path_len": 87, "best_path_length": 87, "best_perf_practice_on_deepseek": [26, 93], "bestpathindic": 1, "bestpathlength": 1, "beta": [30, 82], "beta_fast": 82, "beta_slow": 82, "better": [0, 2, 5, 6, 9, 11, 17, 19, 24, 26, 27, 28, 29, 55, 56, 57, 70, 74, 76, 77, 80, 81, 93], "between": [0, 2, 5, 6, 8, 9, 12, 16, 17, 19, 26, 27, 28, 34, 61, 67, 70, 72, 74, 76, 80, 81, 82, 83, 89, 92, 93, 95], "beyond": [1, 22, 77], "bf16": [5, 11, 17, 19, 26, 28, 64, 77, 80, 91, 93], "bfloat16": [5, 16, 29, 73, 75, 85, 90, 91, 93], "bhuvanesh09": 93, "bi": 5, "bia": [0, 3, 15, 16, 28, 70, 82, 83, 84, 93], "bias": [15, 82], "bidirect": [82, 83], "bidirectionalglm": 82, "bigger": 9, "biggest": 9, "billion": 20, "bin": [15, 16, 17, 20, 30, 33, 34, 35, 37, 38, 39, 55, 56, 57, 72, 92, 93], "binari": [12, 16, 72, 82], "bind": [52, 64, 70, 81, 87, 89, 93, 96, 98, 99], "bindcapacityschedul": 99, "bit": [0, 1, 5, 22, 58, 82, 90], "bitmask": 93, "bl": [12, 84], "black": 7, "blackwel": [2, 20, 27, 59, 64, 67, 76, 77, 91, 93], "blip": [90, 93], "blip2": [90, 91, 93], "blob": 26, "block": [0, 1, 2, 5, 6, 9, 16, 28, 29, 36, 51, 52, 64, 69, 70, 79, 82, 87, 89, 93, 98], "block_controlnet_hidden_st": 84, "block_hash": 51, "block_num": 82, "block_siz": [82, 83, 87], "block_sparse_block_s": 82, "block_sparse_homo_head_pattern": 82, "block_sparse_num_local_block": 82, "block_sparse_param": 83, "block_sparse_vertical_strid": 82, "blockhash": 0, "blockidx": 1, "blockptr": 1, "blocksiz": 0, "blockspars": 82, "blocksparseattnparam": 83, "blog": [20, 21, 24, 25, 26, 27, 28, 93], "bloodeagle40234": 93, "bloom": [6, 17, 90, 91, 93], "bloom_dict": 17, "bloomforcausallm": 84, "bloommodel": 84, "bm": 1, "bmm": 16, "board": 80, "bodi": 16, "book": 58, "bool": [0, 1, 7, 13, 15, 70, 82, 83, 84, 85, 87, 97], "boolean": [1, 3, 10, 82, 84, 85], "boost": [20, 26, 28, 77, 79, 80], "born": [14, 16, 92], "borrow": [36, 49, 73], "bos_token_id": 87, "both": [0, 2, 4, 5, 7, 8, 10, 12, 16, 17, 20, 22, 25, 26, 27, 28, 29, 40, 54, 70, 73, 74, 76, 79, 81, 82, 83, 89, 90, 93, 96, 97], "bottleneck": [4, 11, 20, 25, 76, 79], "bottom": 32, "bound": [0, 6, 14, 16, 23, 26, 27, 28, 70, 73, 82, 87, 89], "boundari": [6, 16, 28, 70, 82, 84, 86, 89], "box": [7, 20], "bpru": 93, "brahma": 73, "branch": [12, 21, 24, 70], "breadth": 12, "break": [12, 26, 69, 73, 80, 93, 99], "breakdown": [72, 73, 74, 75], "breviti": 20, "brief": [84, 87, 97], "briefli": [34, 61], "brife": 0, "bring": [25, 26, 27, 28, 95], "broadcast": [3, 26, 82], "broadcast_help": 82, "broader": [5, 93], "broadli": 28, "broken": [71, 79, 93], "bsz": 83, "bu": 65, "budget": [13, 79], "buffer": [0, 1, 2, 3, 8, 9, 29, 30, 64, 70, 82, 93, 98], "buffer_0": 1, "buffer_1": 1, "buffer_2": 1, "buffer_alloc": 87, "buffercast": 1, "buffercastornul": 1, "bufferdatatyp": 1, "buffermanag": 89, "buffermanagertest": 1, "bufferptr": 1, "bufferrang": 1, "buffers": 1, "bufferview": 0, "bug": [28, 93], "build": [2, 3, 5, 6, 7, 9, 10, 12, 13, 14, 16, 18, 49, 51, 53, 54, 58, 64, 69, 70, 71, 72, 76, 77, 78, 79, 81, 84, 85, 88, 89, 92, 93], "build_cach": 70, "build_config": [19, 29, 36, 49, 53, 54, 58, 70, 77, 79, 80, 84], "build_dir": 65, "build_engin": 16, "build_flags_multiple_profil": 80, "build_serialized_network": 16, "build_wheel": [20, 65, 72], "buildcacheconfig": 70, "buildconfig": [13, 19, 36, 49, 53, 54, 58, 70, 77, 79, 80, 93], "builder": [13, 16, 19, 70, 93], "builder_force_num_profil": 93, "builder_opt": 93, "built": [3, 6, 9, 16, 19, 28, 29, 59, 65, 67, 69, 70, 73, 74, 75, 80, 81, 82, 88, 89, 92, 93], "bump": 1, "bumptaskinprogress": 1, "burden": 76, "busi": 0, "button": 93, "buvnswrn": 93, "bw": 93, "byt5": [91, 93], "byte": [0, 1, 11, 70, 87], "bytestostr": 1, "c": [0, 1, 2, 5, 7, 12, 16, 18, 20, 27, 28, 30, 31, 32, 36, 55, 56, 57, 64, 70, 71, 72, 79, 82, 84, 88, 93, 96, 98, 99], "cach": [0, 1, 2, 3, 6, 10, 16, 19, 25, 26, 27, 28, 29, 30, 36, 40, 41, 43, 44, 54, 64, 68, 70, 71, 73, 74, 75, 79, 82, 87, 88, 90, 93, 94, 95, 96, 97, 99], "cache_indir": 87, "cache_indir_t": 82, "cache_indirect": [5, 82, 83, 87, 92], "cache_root": 70, "cache_transceiver_config": 70, "cachehitr": 0, "cacheindirect": 1, "cachelevel": 0, "cachelevelupd": 0, "caches": 0, "cachest": 0, "cachetransceiv": 0, "cachetransceiverconfig": [0, 70], "cachetyp": 98, "cachevalu": 1, "calcul": [0, 21, 22, 24, 27, 28, 70, 73, 81, 82, 87, 89, 93], "calculate_speculative_resourc": 70, "calculatespeculativeresourc": 0, "calculatespeculativeresourcetupl": 0, "calib_batch": [59, 70, 77, 84], "calib_batch_s": [70, 77, 84], "calib_config": [59, 70, 77], "calib_dataset": [59, 70, 84, 86], "calib_max_seq_length": [59, 70, 77, 84, 86], "calib_s": [73, 86], "calibconfig": [59, 70, 77], "calibr": [17, 25, 28, 29, 59, 70, 77, 93], "call": [0, 1, 3, 4, 5, 6, 7, 16, 17, 19, 27, 28, 36, 52, 70, 72, 75, 77, 82, 84, 86, 87, 88, 89, 93, 95, 96, 97, 98], "callabl": [17, 52, 70, 84], "callback": [3, 52, 70], "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 40, 43, 44, 46, 49, 52, 53, 54, 55, 56, 57, 58, 59, 64, 65, 67, 69, 70, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], "canaccessp": 1, "cancel": [0, 3, 70, 73, 93], "cancelrequest": [0, 3], "candid": [0, 6, 12, 16, 26, 27, 70], "canenqueu": 0, "canenqueuerequest": 0, "cannon": 51, "cannot": [1, 6, 11, 16, 17, 26, 28, 69, 70, 79, 80, 81, 82, 89, 92, 93, 99], "cap": 75, "capabl": [21, 26, 45, 65, 71, 72, 77], "capac": [0, 1, 21, 23, 25, 70, 99], "capacitor_schedul": 99, "capacity_scheduler_polici": [70, 81], "capacityschedul": [96, 98, 99], "capacityschedulerpolici": [0, 70, 81, 93], "capit": [40, 42, 43, 44, 46, 47, 48, 49, 50, 54, 59, 66, 67, 75, 81, 88, 94], "caption": 83, "captur": [27, 28, 70, 97], "card": [53, 58], "carefulli": 20, "case": [0, 1, 2, 5, 6, 8, 9, 10, 12, 20, 22, 25, 26, 27, 28, 29, 36, 73, 74, 75, 77, 78, 80, 82, 90, 93], "cast": [28, 82], "cast_to_dtyp": 82, "castsiz": 1, "cat": [20, 27, 30, 56], "categor": [12, 28, 82], "categori": 85, "categorical_sampl": 82, "caus": [2, 3, 17, 19, 29, 70, 80, 92, 93], "causal": [27, 82, 83, 97], "cautiou": 19, "caveat": 77, "cd": [14, 15, 20, 27, 65, 73, 88, 92, 94], "ceil": [1, 84], "ceil_mod": [82, 83], "ceildiv": 1, "center": [22, 23], "central": 85, "certain": [2, 7, 15, 67, 71, 82], "cg": 84, "chain": 27, "challeng": [26, 71], "chanc": [9, 29, 81], "chang": [2, 5, 6, 8, 9, 10, 17, 19, 21, 23, 24, 27, 28, 65, 69, 70, 71, 73, 80, 82, 84, 87, 89, 92, 94, 98], "channel": [29, 82, 90, 93], "char": [0, 1], "charg": [6, 16, 97], "chart": 22, "chat": [12, 23, 35, 38, 40, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 58, 59, 62, 63, 66, 67, 69, 88, 93, 94], "chatbot": 58, "chatcmpl": 88, "chatglm": [69, 82, 90, 91, 93], "chatglm2": [69, 91, 93], "chatglm3": [69, 84, 91, 93], "chatglm_vers": 84, "chatglmconfig": 84, "chatglmforcausallm": 84, "chatglmgenerationsess": 87, "chatglmmodel": 84, "check": [2, 3, 40, 66, 67, 70, 74, 76, 77, 79, 80, 82, 87, 88, 89, 92, 93, 95], "check_accuraci": 15, "check_config": 84, "check_gpt_mem_usag": 89, "checkbeamsearchdiversityr": 0, "checkbeamwidth": 0, "checkbeamwidtharrai": 0, "checkearlystop": 0, "checklengthpenalti": 0, "checkminp": 0, "checkmintoken": 0, "checknorepeatngrams": 0, "checknumreturnsequ": 0, "checkpoint": [14, 17, 18, 19, 20, 26, 27, 28, 29, 30, 46, 54, 64, 69, 70, 73, 75, 77, 86, 87, 88, 90, 92, 93, 95], "checkpoint_dir": [10, 13, 14, 15, 16, 19, 29, 73, 88, 92], "checkposteriorvalu": 0, "checkremotedesc": 0, "checkrepetitionpenalti": 0, "checktemperatur": 0, "checktopk": 0, "checktopp": 0, "checktoppdecai": 0, "checktoppmin": 0, "checktoppresetid": 0, "chef": 92, "chmod": 31, "choic": [0, 12, 25, 27, 29, 54, 73, 76, 82, 87, 88, 97], "choos": [16, 19, 26, 28, 77, 82, 93], "chosen": [28, 89, 99], "chrome": 72, "chrono": 0, "chunk": [0, 8, 28, 29, 64, 68, 70, 80, 82, 87, 89, 93], "chunk_dim": 83, "chunk_length": 93, "chunk_scan": 82, "chunk_siz": [82, 84], "chunkedcontextnexttoken": 1, "chunkedcontextnexttokenshost": 1, "ci": 1, "circular": 5, "citi": [59, 88], "ckpt": [54, 73, 88], "ckpt_dir": [16, 19, 84], "ckpt_llama_3": 16, "cl": [14, 19], "claim": [1, 17], "claimpag": 1, "claimpageswithevict": 1, "clamp": [70, 93], "clamp_val": 70, "class": [0, 1, 2, 5, 6, 7, 8, 13, 14, 16, 17, 19, 25, 29, 36, 43, 44, 46, 49, 52, 53, 54, 65, 69, 70, 76, 77, 80, 82, 83, 84, 85, 86, 87, 92, 93, 95, 96, 97, 99], "class_dropout_prob": 83, "class_label": 83, "classic": [16, 64], "classifi": [83, 84], "classmethod": [14, 19, 70, 83, 84, 87], "classvar": 70, "clean": [20, 65, 72, 92], "clear": [67, 79, 87], "clearli": 81, "cli": [15, 20, 36, 64, 73, 76, 77, 79, 80, 88], "click": [31, 32], "client": [0, 3, 30, 63, 74], "client_id": 52, "clientid": 0, "clip": 82, "clip_before_cast": 82, "clip_qkv": [83, 84], "clip_vision_model": 84, "clipvisiontransform": 84, "clock": 26, "clone": [10, 20, 65, 69, 75, 88, 92, 94], "clone_input": 7, "close": [5, 19, 20, 29, 80, 89], "closur": 82, "cloud": [22, 31, 32], "cls_token": 83, "cluster": [6, 16, 26, 29, 30, 67, 70, 93], "cluster_info": 93, "cluster_kei": [29, 93], "cluster_s": 30, "cmake": [65, 93], "cnn_dailymail": [59, 70, 84], "co": [0, 10, 20, 27, 28, 34, 61, 69, 82, 83, 88, 92], "coalesc": 52, "coast": 88, "code": [2, 5, 7, 8, 11, 12, 16, 19, 25, 26, 28, 30, 36, 55, 56, 57, 64, 69, 70, 71, 72, 73, 82, 90, 91, 92, 93, 95, 98, 99], "codebas": [8, 95], "codellama": 93, "codepath": 93, "codeqwen": 93, "coderham": 93, "cogvlm": [91, 93], "cogvlmattent": 83, "cogvlmconfig": 84, "cogvlmforcausallm": 84, "coher": [6, 93], "cohereconfig": 84, "cohereforcausallm": 84, "collabor": [6, 26, 28, 59, 82], "collect": [1, 7, 11, 12, 16, 26, 28, 70, 74, 82, 95], "collect_and_bia": 83, "color": [58, 79], "column": [10, 82, 90], "columnlinear": [10, 14, 83], "com": [19, 20, 26, 65, 82, 88, 92, 93, 94], "combin": [0, 7, 12, 23, 26, 27, 28, 29, 54, 55, 56, 57, 73, 74, 77, 79, 83, 93, 97, 99], "combinedtimesteplabelembed": 83, "combinedtimesteptextprojembed": 83, "come": [6, 10, 22, 75, 76, 79, 81, 89, 92], "comm": 70, "comma": [82, 87], "command": [9, 10, 14, 15, 16, 19, 20, 30, 31, 32, 55, 56, 57, 65, 69, 72, 73, 75, 80, 85, 88, 89, 92, 93, 94], "commandr": 93, "comment": 93, "commit": 28, "commmod": 0, "common": [0, 5, 8, 9, 12, 20, 28, 40, 51, 69, 70, 82, 89, 98], "common_prefix": 51, "commonli": [7, 26, 30, 93], "commstat": 0, "commtyp": 0, "commun": [0, 2, 6, 11, 16, 28, 29, 59, 69, 71, 77, 82, 91, 93], "communicationmod": [0, 2], "communicationtyp": 0, "compani": 53, "compar": [1, 2, 17, 22, 23, 25, 27, 28, 77, 79, 80, 81, 82, 97], "comparison": [6, 22, 26, 27, 73], "compat": [12, 19, 27, 30, 65, 80, 83, 88, 91, 93, 95], "compbin": 10, "compil": [6, 11, 18, 64, 67, 70, 71, 72, 73, 82, 92], "complet": [0, 1, 2, 3, 6, 8, 9, 12, 33, 34, 36, 60, 61, 63, 65, 69, 70, 71, 73, 74, 75, 79, 80, 88, 93, 98, 99], "completion_token": 88, "completionoutput": [36, 53, 70], "complex": [7, 8, 12, 16, 26], "compli": 30, "complic": [27, 28, 95], "compon": [2, 3, 5, 16, 18, 25, 26, 27, 28, 64, 90, 96], "compos": [0, 6, 73], "comprehens": [20, 30, 71], "compress": [21, 28], "compris": 25, "comput": [0, 1, 4, 5, 6, 9, 12, 16, 21, 22, 23, 25, 26, 27, 28, 29, 42, 46, 47, 49, 50, 52, 70, 72, 73, 76, 77, 81, 82, 89, 92, 93, 95, 96, 97, 98], "compute_relative_bia": 83, "computecontextlogit": 1, "computegenerationlogit": 1, "computenumpackedmask": 1, "concat": [14, 26, 82], "concat_kvcach": 26, "concaten": [5, 10, 17, 26, 82, 95], "conced": 51, "concept": [16, 73, 78, 93, 98], "conceptu": 1, "concern": [16, 89], "conclus": 78, "concret": 95, "concur": 51, "concurr": [1, 2, 12, 20, 22, 26, 27, 28, 73, 93], "cond_proj_dim": 83, "conda": 93, "condit": [0, 1, 3, 6, 7, 12, 73, 82, 83, 93], "condition": 82, "conditioning_embed": 83, "conditioning_embedding_dim": 83, "conduct": [5, 73], "confess": 51, "config": [0, 1, 5, 9, 10, 13, 14, 17, 19, 20, 21, 27, 28, 30, 37, 70, 73, 79, 83, 84, 85, 87, 92, 93, 95, 98], "config_class": 84, "config_dir": 84, "config_fil": [30, 70, 84], "configdict": 70, "configur": [0, 1, 2, 4, 5, 8, 12, 17, 18, 20, 23, 29, 30, 43, 44, 45, 49, 53, 54, 58, 65, 67, 70, 73, 74, 75, 78, 79, 81, 84, 87, 89, 92, 93, 97], "configuration_llama": 95, "configuration_mymodel": 95, "configuration_util": 95, "confirm": [42, 46, 47, 49, 50], "conform": 70, "conjunct": 79, "connect": [0, 11, 16, 75, 76, 78], "connectioninfo": 0, "connectioninfotyp": 0, "connectionmanag": 0, "connectremoteag": 0, "consecut": 6, "consequ": [2, 25, 76, 80], "conserv": [0, 81], "consid": [0, 1, 10, 12, 20, 25, 58, 59, 70, 74, 79, 82, 95, 99], "consider": [19, 25, 36], "consist": [7, 19, 22, 26, 70, 71, 73, 75, 82, 90, 92, 97], "consol": 31, "consolid": 12, "const": [0, 1, 3], "const_iter": 1, "constant": [1, 5, 82, 89], "constant_to_tensor_": 82, "constantli": [42, 46, 47, 49, 50], "constants_to_tensors_": 82, "constantthreshold": 1, "constexpr": [0, 1], "constpointercast": 1, "constrain": [6, 25], "constraint": [0, 5, 6, 25, 67, 82], "construct": [0, 1, 3, 12, 16, 73, 82, 93, 97], "constructor": [0, 13, 58, 69, 88, 97], "consult": [12, 65, 72], "consum": [0, 7, 28, 70, 82], "consumpt": [5, 22, 27, 29], "contact": 82, "contain": [0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 15, 16, 17, 18, 19, 26, 29, 30, 32, 55, 56, 57, 66, 67, 70, 71, 73, 74, 82, 84, 87, 88, 90, 91, 93, 94, 96, 97], "container_imag": [55, 56, 57], "container_img": 30, "content": [1, 10, 19, 30, 31, 33, 34, 35, 45, 60, 61, 64, 82, 88, 89, 93], "context": [0, 2, 4, 9, 25, 27, 28, 29, 64, 68, 70, 73, 78, 82, 87, 89, 92, 93, 97, 98, 99], "context_chunking_polici": [70, 81], "context_fmha": [10, 29], "context_fmha_fp32_acc": 93, "context_fmha_typ": [5, 89], "context_init": 99, "context_len": [87, 97], "context_length": [82, 83, 87, 92], "context_logit": [70, 87], "context_mem_s": 87, "context_onli": 70, "context_parallel_s": 70, "context_phas": 5, "context_pre_onli": 83, "context_request": 99, "contextchunkingpolici": [0, 70, 81, 93], "contextexecutor": 2, "contextfmha": 1, "contextidx": 0, "contextlogit": 0, "contextmanag": 69, "contextparallel": 1, "contextphaseparam": [0, 2, 70], "contextpositionid": 1, "contextprefillposit": 0, "contextrequest": 1, "contextrequestid": 2, "contextrespons": 2, "contigu": [2, 8, 76, 82, 93], "continu": [1, 3, 5, 12, 23, 25, 29, 70, 71, 77, 79, 87, 99], "contract": 73, "contrast": [6, 12, 97], "contrib": 21, "contribut": [19, 27, 28, 73, 82, 93], "contributor": [26, 89], "control": [0, 2, 5, 6, 7, 11, 36, 40, 41, 70, 72, 73, 75, 81, 82, 83, 87, 90, 93], "conv": 82, "conv1d": [29, 82, 83], "conv2d": [82, 83], "conv3d": [82, 83], "conv_bia": 82, "conv_kernel": 87, "conv_stat": 84, "conv_state_or_ptr": 82, "conv_transpose2d": 82, "conv_weight": 82, "conveni": [1, 14, 19, 65], "convent": [19, 82], "convers": [1, 17, 24, 25, 58, 64, 88, 93], "convert": [0, 1, 10, 13, 14, 15, 16, 17, 19, 71, 73, 75, 77, 88, 92, 93, 97], "convert_and_load_weights_into_trtllm_llama": 19, "convert_checkpoint": [10, 13, 14, 15, 16, 19, 75, 76, 88, 92, 93], "convert_coneckpoint": 4, "convert_hf_mpt_legaci": 93, "convert_load_format": 70, "convert_util": 93, "convert_weights_from_custom_training_checkpoint": 19, "convkernel": 1, "convolut": [0, 87], "convtranspose2d": 83, "coordin": [12, 64, 82], "copi": [0, 1, 2, 9, 12, 29, 32, 70, 77, 82, 89, 93, 97], "copy_on_partial_reus": 70, "copyfrom": 1, "copyonpartialreus": 0, "copytask": 1, "copytaskmappag": 1, "copyto": 0, "copytocpu": 0, "copytogpu": 0, "copytomanag": 0, "copytopag": 1, "copytopin": 0, "copytopooledpin": 0, "core": [6, 7, 10, 13, 16, 19, 21, 22, 24, 28, 65, 69, 73, 76, 88, 92, 93, 96], "corner": 28, "coroutin": [47, 48, 70], "correct": [2, 3, 5, 10, 12, 27, 93], "correctli": [9, 82, 93, 95], "correspond": [0, 1, 2, 4, 5, 7, 8, 10, 12, 17, 19, 27, 30, 70, 72, 80, 82, 83, 87, 90, 92, 93, 95], "cost": [9, 16, 26, 27, 28, 73, 76, 89, 93], "costli": 26, "could": [0, 2, 7, 8, 9, 15, 46, 47, 48, 49, 50, 59, 70, 75, 89, 92, 93], "couldn": 79, "count": [0, 1, 6, 30, 38, 39, 69, 73, 84, 88], "count_include_pad": [82, 83], "countlocallay": 1, "countlowerranklay": 1, "cours": 12, "court": [42, 46, 47, 49, 50], "cover": [20, 77, 78, 80], "coverag": 70, "cp312": 65, "cp_config": 70, "cp_group": [82, 83], "cp_rank": [82, 83], "cp_size": [82, 83, 86, 93], "cp_split_plugin": 82, "cpp": [2, 3, 5, 6, 16, 20, 28, 30, 56, 64, 65, 72, 73, 74, 75, 92, 93], "cpp_e2e": 87, "cpp_extens": 67, "cpp_llm_onli": 87, "cpp_onli": 65, "cpu": [0, 1, 8, 9, 10, 13, 16, 26, 27, 29, 30, 52, 67, 70, 82, 89, 92, 93, 97], "cpumemusag": [0, 70], "crash": 93, "creat": [1, 2, 3, 7, 8, 9, 12, 13, 14, 16, 18, 19, 26, 30, 31, 36, 42, 46, 47, 48, 49, 50, 51, 52, 59, 60, 61, 62, 69, 70, 71, 73, 74, 75, 79, 80, 82, 83, 84, 87, 88, 89, 93, 95, 96, 97, 99], "create_allreduce_plugin": 82, "create_attention_const_param": 83, "create_builder_config": 13, "create_cuda_graph_metadata": 97, "create_execution_context": 87, "create_fake_weight": 82, "create_network": 16, "create_pytorch_model_based_executor": [98, 99], "create_runtime_default": 84, "create_sinusoidal_posit": 82, "create_sinusoidal_positions_for_attention_plugin": 82, "create_sinusoidal_positions_for_cogvlm_attention_plugin": 82, "create_sinusoidal_positions_long_rop": 82, "create_sinusoidal_positions_yarn": 82, "createloramodul": 1, "creation": [1, 70, 82, 89], "creativ": 6, "criteria": 87, "critic": [26, 73, 92], "crop": 83, "cropped_pos_emb": 83, "cross": [0, 10, 11, 26, 27, 70, 82, 87, 93], "cross_attent": [83, 87], "cross_attention_dim": 83, "cross_attention_mask": [83, 87], "cross_attention_mask_for_context": 87, "cross_attention_mask_for_gen": 87, "cross_attention_norm": 83, "cross_attention_norm_num_group": 83, "cross_attention_packed_mask": 83, "cross_attn_dens": [10, 29], "cross_attn_k": [10, 29], "cross_attn_q": [10, 29], "cross_attn_qkv": [10, 29], "cross_attn_v": [10, 29], "cross_kv": 82, "cross_kv_cache_block_offset": [83, 87], "cross_kv_cache_fract": [70, 87], "cross_kv_cache_gen": [83, 84], "cross_kv_length": 82, "cross_kv_reus": [83, 84], "crossattentionmask": 0, "crosskvcachefract": [0, 93], "crosskvcachestat": 0, "crucial": [12, 16, 25, 96], "ctor": 82, "ctx": 0, "ctx_request_id": 70, "ctxenginepath": 0, "ctxexecutorconfig": 0, "cu": [16, 26], "cu12": 93, "cu128": [66, 67], "cuassert": 92, "cubla": 28, "cublaslt": [29, 80], "cublasltmatmul": 28, "cublasscaledmm": 28, "cuda": [0, 1, 2, 5, 11, 16, 20, 27, 28, 52, 59, 65, 66, 67, 70, 72, 73, 84, 87, 89, 92, 93, 97, 98], "cuda_arch": 65, "cuda_architectur": [20, 65], "cuda_graph_batch_s": [20, 70, 74], "cuda_graph_cache_s": 70, "cuda_graph_inst": 92, "cuda_graph_max_batch_s": 70, "cuda_graph_mod": [70, 87, 92], "cuda_graph_padding_en": [20, 28, 56, 70, 74], "cuda_hom": 67, "cuda_launch_block": 92, "cuda_stream": 92, "cuda_stream_guard": 87, "cuda_stream_sync": 82, "cudadevicegetstreampriorityrang": 1, "cudaevent_t": 1, "cudaeventdisabletim": 1, "cudagraph": 93, "cudagraphcaches": 0, "cudagraphlaunch": 92, "cudagraphmod": 0, "cudamalloc": [1, 2], "cudamallocasync": [1, 2], "cudamemcpyasync": 52, "cudamempool": 1, "cudamempoolptr": 1, "cudaprofilerapi": 72, "cudart": 92, "cudastream": 0, "cudastream_t": 1, "cudastreamcreatewithflag": 1, "cudastreamnonblock": 1, "cudastreamptr": [0, 1], "cudeviceptr": 1, "cudnn": 93, "cufil": 0, "cumemgenericallocationhandl": 1, "cumlogprob": [0, 1], "cumlogprobscba": 1, "cumsum": [82, 93], "cumsumgenerationlength": 1, "cumsumlastdim": 82, "cumsumlength": 1, "cumul": [0, 1, 70, 82], "cumulative_logprob": [36, 53, 70], "curand": 93, "curl": [30, 63, 88], "currenc": 73, "current": [0, 1, 2, 3, 5, 10, 12, 20, 25, 26, 27, 28, 29, 36, 45, 58, 65, 70, 73, 77, 79, 80, 81, 82, 87, 89, 91, 93, 94, 96, 97, 98, 99], "current_stream": 92, "currentexpandindic": 1, "curv": 24, "custom": [6, 16, 19, 21, 26, 27, 29, 40, 41, 43, 44, 52, 53, 54, 65, 71, 77, 80, 82, 87, 93, 96, 97], "custom_all_reduc": 93, "custom_mask": 82, "customallreduc": 93, "customized_key_dict": 17, "customized_preprocess": 17, "customizedmodulea": 17, "customizedmoduleb": 17, "cutlass": [28, 70, 93], "cxx11": 65, "cyclic": [64, 82, 87], "d": [1, 10, 30, 31, 33, 34, 35, 55, 56, 57, 58, 73, 82, 83, 88, 92, 93], "d0": 26, "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b": 73, "d_": 27, "d_6": 27, "dangl": 7, "data": [0, 1, 2, 5, 6, 8, 11, 16, 17, 21, 22, 23, 24, 25, 26, 29, 51, 61, 70, 73, 74, 75, 82, 84, 91, 92, 93, 95], "data_path": 56, "data_typ": [13, 15], "datacontext": 0, "dataset": [26, 27, 28, 34, 56, 59, 61, 70, 72, 77, 93], "dataset_fil": 74, "dataset_path": 73, "datatyp": [0, 1, 6, 16, 82, 87, 90, 92], "datatypetrait": 1, "date": 19, "datetim": 70, "dbrx": [90, 91, 93], "dbrxconfig": 84, "dbrxforcausallm": 84, "dconv": 82, "de": 1, "deactiv": 36, "dead": 93, "deal": [5, 7, 92], "dealloc": [1, 8, 99], "death": [42, 46, 47, 49, 50], "debug": [0, 8, 29, 30, 64, 65, 87, 89, 93], "debug_buff": 92, "debug_mod": [87, 92], "debug_tensors_to_sav": 87, "debugconfig": 0, "debuginputtensor": 0, "debugoutputtensor": 0, "debugtensor": 0, "debugtensornam": 0, "debugtensorsmaxiter": 0, "debugtensorsperiter": 0, "dec": [29, 87, 93], "decai": [0, 6, 70], "decid": [5, 15, 64, 73, 78, 79, 90, 96, 99], "decilmforcausallm": 91, "decis": [58, 82], "declar": [1, 6, 7, 19, 96, 98], "decltyp": [0, 1], "decod": [0, 1, 2, 5, 6, 14, 19, 26, 28, 30, 40, 41, 64, 70, 73, 82, 87, 91, 93, 95, 98], "decode_batch": 87, "decode_duration_m": 70, "decode_regular": 87, "decode_retention_prior": 70, "decode_stream": 87, "decode_words_list": 87, "decode_wrapp": 97, "decodedurationm": 0, "decoder_batch": 1, "decoder_input_id": [84, 87], "decoder_language_adapter_rout": 87, "decoder_lay": 95, "decoder_start_token_id": 29, "decoderbuff": 1, "decoderenginebuff": 0, "decoderetentionprior": 0, "decoderjsonconfigstr": 0, "decoderlay": 95, "decoderlayerlist": 14, "decoderlookaheadbuff": 1, "decodermaskedmultiheadattent": 5, "decodermodel": [0, 84, 95], "decodermodelforcausallm": [14, 19, 84, 95], "decodermodelpath": 0, "decoderst": 93, "decoderxqarunn": 5, "decoding_config": 70, "decoding_typ": [20, 27, 70], "decodingbaseconfig": 70, "decodingconfig": [0, 1], "decodinginputptr": 1, "decodingit": 0, "decodinglayerworkspac": 1, "decodingmod": [0, 1, 93], "decodingoutputptr": 1, "decompos": 5, "decor": 95, "decoupl": [26, 89], "decreas": [21, 22, 77], "dedic": [26, 28, 92], "deduc": [29, 30, 93], "deep": [16, 22, 23, 72, 82, 93], "deeper": 27, "deepgemm": 20, "deeplearn": [82, 92], "deepseek": [30, 63, 72, 74, 91, 93], "deepseek_v1": 93, "deepseek_v2": 93, "deepseek_v3": [26, 93], "deepseekforcausallm": 84, "deepseekv1config": 84, "deepseekv2": 82, "deepseekv2attent": 83, "deepseekv2config": 84, "deepseekv2forcausallm": 84, "deepseekv3forcausallm": 91, "deepseekv3routingimpl": 28, "deepspe": 15, "def": [7, 14, 16, 17, 19, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 66, 67, 75, 77, 80, 81, 88, 92, 94, 95, 99], "default": [0, 1, 2, 3, 4, 5, 6, 9, 15, 17, 19, 27, 28, 29, 30, 31, 36, 54, 59, 64, 65, 70, 72, 74, 77, 78, 79, 80, 81, 82, 84, 87, 88, 89, 90, 92, 93, 95, 97], "default_net": 82, "default_plugin_config": 84, "default_trtnet": 16, "defaultvalu": 1, "defer": 82, "defin": [0, 1, 3, 5, 7, 12, 15, 16, 17, 18, 19, 20, 23, 29, 71, 73, 80, 82, 83, 90, 93, 95, 97], "definit": [3, 5, 8, 18, 19, 26, 64, 71, 82, 92], "deftruth": 93, "degrad": [0, 2, 29, 77], "degre": [42, 46, 47, 49, 50, 52, 74, 77, 80], "delai": [74, 93], "deleg": [82, 97], "delet": [0, 1, 85, 92], "deliv": [20, 21, 24, 26, 27, 74], "delta": [0, 26, 27, 82, 83], "delta_bia": 82, "delta_softplu": 82, "delv": 28, "demand": [26, 28], "demo": [26, 34, 61], "demonstr": [3, 17, 22, 26, 69, 75, 77, 79, 80], "denmark": 51, "denois": 83, "denot": 12, "dens": [4, 5, 10, 15, 17, 82], "dense_4h_to_h": 17, "dense_bia": 83, "dense_h_to_4h": 17, "densiti": 25, "dep": 65, "departur": 51, "depend": [0, 2, 3, 5, 6, 7, 12, 15, 23, 30, 67, 70, 74, 75, 77, 80, 82, 89, 92, 93, 98], "deploi": [12, 15, 30, 64, 67, 70, 71], "deplot": [91, 93], "deploy": [25, 26, 71, 73, 77, 88, 93], "deprec": [29, 70, 71, 73, 93], "deprecationwarn": 73, "depriv": 7, "depth": 12, "dequ": [0, 1], "dequant": [5, 11, 64, 82], "deregistermemori": 0, "deriv": [16, 17, 82, 89, 96], "desc": 0, "descendli": 6, "describ": [0, 5, 6, 8, 9, 10, 12, 14, 16, 17, 18, 20, 24, 32, 34, 61, 65, 69, 73, 74, 80, 82, 90, 92, 97], "descript": [0, 1, 6, 10, 30, 54, 64, 73, 74, 80, 82, 97], "descriptor": 70, "deseri": [0, 19, 52], "deserializeadditionalmodeloutput": 0, "deserializeadditionaloutput": 0, "deserializeagentst": 0, "deserializebool": 0, "deserializecachest": 0, "deserializecachetransceiverconfig": 0, "deserializecommst": 0, "deserializecontextphaseparam": 0, "deserializedatatransceiverst": 0, "deserializedebugconfig": 0, "deserializedecodingconfig": 0, "deserializedecodingmod": 0, "deserializedisservingrequeststat": 0, "deserializedynamicbatchconfig": 0, "deserializeeagleconfig": 0, "deserializeexecutorconfig": 0, "deserializeextendedruntimeperfknobconfig": 0, "deserializeexternaldrafttokensconfig": 0, "deserializeguideddecodingconfig": 0, "deserializeguideddecodingparam": 0, "deserializeinflightbatchingstat": 0, "deserializeiterationstat": 0, "deserializeiterationstatsvec": 0, "deserializekvcacheconfig": 0, "deserializekvcacheretentionconfig": 0, "deserializekvcachestat": 0, "deserializelookaheaddecodingconfig": 0, "deserializeloraconfig": 0, "deserializemodeltyp": 0, "deserializemropeconfig": 0, "deserializeorchestratorconfig": 0, "deserializeoutputconfig": 0, "deserializeparallelconfig": 0, "deserializepeftcacheconfig": 0, "deserializeprompttuningconfig": 0, "deserializerequest": 0, "deserializerequestperfmetr": 0, "deserializerequeststag": 0, "deserializerequeststat": 0, "deserializerequeststatsperiter": 0, "deserializerequeststatsperiterationvec": 0, "deserializerespons": 0, "deserializeresult": 0, "deserializesamplingconfig": 0, "deserializeschedulerconfig": 0, "deserializesocketst": 0, "deserializespecdecfastlogitsinfo": 0, "deserializespecdecodingstat": 0, "deserializespeculativedecodingconfig": 0, "deserializestaticbatchingstat": 0, "deserializestr": 0, "deserializetensor": 0, "deserializetimepoint": 0, "deserializetokenrangeretentionconfig": 0, "design": [1, 11, 12, 16, 17, 19, 20, 25, 26, 27, 28, 69, 75, 88, 96, 97, 98], "desir": [3, 74, 82, 88, 97], "destin": [55, 56, 57], "destroi": [1, 89], "destroyipcmemori": 1, "destructor": 1, "detail": [0, 3, 5, 11, 12, 14, 16, 20, 26, 28, 29, 30, 36, 40, 45, 59, 64, 73, 74, 75, 77, 81, 82, 84, 89, 92, 93, 96, 97, 98], "detect": [0, 3, 30, 70, 82, 93], "detect_format": 17, "determin": [0, 1, 5, 6, 10, 11, 19, 27, 70, 76, 77, 81, 82, 84, 90, 96, 98, 99], "determinenumpag": 1, "determinist": [27, 80, 93], "detoken": [70, 93, 96], "detokenizedgenerationresultbas": 70, "dev": [66, 67, 93], "devel": [31, 32, 65], "develop": [14, 15, 16, 19, 26, 27, 31, 42, 46, 47, 49, 50, 64, 65, 69, 71, 75, 82, 91, 93, 95], "deviat": 74, "devic": [0, 1, 2, 52, 70, 77, 82, 84, 86, 87, 92], "device_id": 87, "device_map": 86, "device_memory_size_v2": 89, "device_request_typ": 84, "deviceallocationnvl": 1, "devicecach": 1, "devicecacheperc": 0, "deviceid": [0, 1, 2], "dgx": [6, 16, 20, 28], "di": 27, "diagon": 82, "diagram": [12, 28], "diamond": [26, 28], "dict": [14, 17, 19, 70, 82, 84, 87, 93, 95, 98], "dict_kei": 92, "dictat": 79, "dictionari": [15, 17, 70, 83], "didn": 79, "differ": [0, 1, 2, 4, 5, 6, 8, 9, 11, 14, 15, 16, 17, 19, 20, 25, 27, 28, 29, 34, 61, 65, 69, 70, 71, 73, 75, 77, 79, 80, 82, 84, 87, 89, 90, 93, 97], "differenti": 82, "difftyp": 1, "diffus": [34, 61, 83, 93], "diffusersattent": 83, "digit": 71, "dilat": [82, 83], "dim": [0, 1, 82, 83, 84, 87, 92], "dim0": 82, "dim1": 82, "dim_head": 83, "dim_in": 83, "dim_out": 83, "dim_rang": 82, "dimems": 1, "dimens": [0, 1, 5, 6, 10, 28, 82, 83, 84, 89, 92, 93, 95], "dimension": 82, "dimrang": 82, "dimtype64": [0, 1], "dir": [36, 65, 69], "direct": [0, 2, 11, 19, 67, 92], "directli": [0, 2, 6, 7, 12, 16, 19, 27, 28, 32, 36, 65, 69, 73, 80, 81, 82, 88, 93, 97, 99], "directori": [0, 3, 14, 15, 16, 17, 19, 29, 55, 56, 57, 65, 70, 73, 74, 75, 84, 87, 88, 93, 95], "disabl": [0, 1, 5, 6, 9, 13, 17, 29, 70, 73, 77, 80, 81, 82, 85, 87, 89, 93], "disable_forward_chunk": 84, "disable_kv_cach": 87, "disable_overlap_schedul": [28, 70], "disable_weight_only_quant_plugin": 84, "disable_xqa": 5, "disablelookahead": 1, "disablelookaheaddecod": 1, "disableseamlesslookaheaddecod": 1, "disadvantag": [19, 76], "disagg_executor": 0, "disaggexecutororchestr": [0, 2], "disaggreg": [0, 64, 70, 93], "disaggregated_param": 70, "disaggregatedparam": 70, "disaggserverbenchmark": [2, 93], "disaggserverutil": 2, "discard": 77, "disclaim": [27, 75, 77, 79, 80], "disclosur": 93, "disconnect": 93, "discourag": [0, 6, 70], "discov": [16, 67], "discrep": [65, 95], "discuss": [5, 27, 75, 77, 80, 81, 93], "disk": [3, 19, 46, 49, 65, 69], "dispatch": [0, 4, 19, 26, 36], "displai": 70, "disservingrequeststat": 0, "disservingstat": 0, "dist": [20, 56, 67, 72, 73, 74, 75], "distanc": [5, 82], "distil": 93, "distinct": [8, 10, 12, 26, 82], "distinguish": 9, "distribut": [1, 4, 5, 6, 16, 26, 40, 41, 73, 82, 87, 89], "distserv": 2, "disturb": 51, "dit": [84, 93], "div": 82, "dive": [27, 71, 72], "divers": [0, 6, 72], "diversity_penalti": 6, "divid": [17, 27, 82, 93], "divup": 82, "dl": 25, "dlsym": 0, "do": [1, 2, 7, 17, 19, 20, 25, 26, 27, 28, 36, 64, 67, 70, 75, 77, 80, 82, 88, 92, 95, 97], "do_cross_attent": [82, 83], "do_layer_norm_befor": 15, "do_sampl": 6, "doc": [1, 20, 24, 26, 32, 77, 80, 82, 92, 93], "docker": [20, 55, 56, 57, 64, 88, 92, 93], "docker_run_arg": 20, "dockerfil": [31, 65], "document": [0, 2, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 19, 22, 23, 25, 27, 33, 34, 35, 36, 37, 38, 39, 45, 60, 61, 62, 65, 67, 68, 72, 74, 75, 81, 82, 89, 90, 92, 96, 97], "doe": [0, 2, 5, 10, 12, 19, 20, 21, 28, 29, 73, 74, 80, 82, 87, 89, 91, 93, 95, 99], "doesn": [1, 5, 26, 31, 36, 73, 79, 80], "dollar": 73, "domain": 11, "domin": [26, 93], "don": [12, 19, 28, 31, 76, 80, 82], "done": [1, 9, 16, 20, 28, 71, 73, 77, 79, 82, 85, 95], "dongjiyingdji": 93, "dora": [29, 82, 83], "dora_plugin": [10, 29, 82], "dot": [17, 26, 82], "doubl": [0, 22, 78, 80, 92], "down": [0, 2, 3, 10, 21, 27, 28, 58, 71, 76, 82, 87], "down_proj": 17, "download": [18, 55, 56, 57, 58, 65, 66, 67, 69, 73, 75, 88, 92, 93], "downscale_freq_shift": 83, "downsid": 80, "downstream": 90, "dp": [20, 21, 24, 26, 27, 28, 93], "dp8": [26, 28], "dprank": 0, "dpsize": 0, "dq": 64, "draft": [0, 1, 26, 27, 29, 64, 70, 87, 93], "draft_indic": 84, "draft_len": 84, "draft_path": 87, "draft_prob": 84, "draft_target_model": 12, "draft_token": [70, 84], "draft_tokens_extern": [29, 84], "draftacceptancethreshold": 1, "draftbuff": 1, "drafter": [12, 70], "draftindic": 1, "draftlen": 1, "draftlogit": 1, "draftoverhead": 0, "draftparticipantid": 0, "draftpath": 1, "draftpathshost": 1, "draftprob": 1, "draftrequestid": 0, "drafttoken": [0, 1], "drafttokenid": 1, "drafttokensextern": 1, "dram": [0, 16, 70], "drastic": 28, "dreamgenx": 93, "drive": [16, 73], "driven": 71, "driver": [89, 93], "drop": [27, 28, 77, 79, 81], "dropout": 83, "dropout_prob": 83, "dry_run": [29, 70, 93], "dst": 1, "dstate": 82, "dstdesc": 0, "dsttype": 1, "dt_proj": 82, "dt_rank": 82, "dtype": [1, 7, 10, 13, 14, 15, 16, 19, 70, 73, 75, 76, 82, 83, 84, 85, 86, 87, 92, 93, 98], "dual": 65, "due": [0, 12, 19, 23, 26, 28, 65, 73, 75, 79, 81, 87, 93, 97], "dummi": [70, 75, 93], "dump": [0, 3, 65, 70], "dump_debug_buff": 87, "duplic": [28, 93], "duplicate_data": 82, "durat": [0, 75], "duration_m": 70, "durationm": 0, "dure": [0, 1, 5, 6, 7, 11, 12, 13, 16, 24, 26, 27, 28, 29, 65, 70, 72, 73, 80, 81, 87, 89, 92, 97, 98], "dynam": [0, 26, 27, 29, 70, 73, 82, 84, 87, 89, 93, 99], "dynamic_batch_config": 70, "dynamic_batch_moving_average_window": 70, "dynamic_quant_bf16tonvfp4": 26, "dynamic_tree_max_topk": [43, 44, 70], "dynamicbatchconfig": [0, 70], "dynamicbatchmovingaveragewindow": 0, "dynamicbatchsizeconfig": 0, "dynamicdecodelay": 1, "dynamicqu": 26, "dynamictreemaxtopk": 0, "dynamictreemaxtopkhost": 1, "dynlibload": 0, "e": [0, 2, 3, 5, 8, 9, 10, 11, 17, 27, 28, 30, 31, 52, 55, 56, 57, 65, 70, 72, 82, 85, 87, 90, 92, 93, 95], "e2": [28, 64], "e4m3": [11, 22], "e5m2": 22, "each": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 20, 26, 27, 28, 29, 30, 36, 52, 55, 56, 57, 70, 73, 74, 75, 76, 79, 80, 81, 82, 83, 85, 87, 89, 90, 92, 93, 96, 97, 98, 99], "eager": [28, 71, 93], "eagl": [0, 1, 29, 40, 41, 43, 64, 70, 84, 87, 93], "eagle2": [40, 41], "eagle3_one_model": 70, "eagle_choic": [43, 44, 70, 87], "eagle_dynamic_tree_max_top_k": 87, "eagle_posterior_threshold": 87, "eagle_temperatur": 84, "eagle_use_dynamic_tre": 87, "eaglechoic": [0, 1], "eagleconfig": [0, 1, 84], "eagledecodingconfig": [43, 44, 70], "eagleforcausallm": 84, "eagleinput": 1, "eaglelastinput": 1, "eaglenetctxcontextlengthshost": 1, "eaglenetctxpastkeyvaluelengthshost": 1, "eaglenetctxrequesttypeshost": 1, "eaglenetgencontextlengthshost": 1, "eaglenetgenpastkeyvaluelengthshost": 1, "eaglenetgenrequesttypeshost": 1, "ealge2": 27, "earli": [87, 92, 93], "earlier": [0, 15, 77, 92], "early_stop": [6, 70, 87, 93], "early_stop_criteria": 87, "earlystop": [0, 1, 6], "eas": [18, 71, 74], "easi": [25, 75], "easier": [16, 19, 20, 27, 73], "easili": [17, 18, 20, 26, 71, 82], "east": [14, 16, 92], "eastern": 88, "ebnf": [0, 3, 70], "echo": [30, 31, 32, 56, 57], "eddi": 93, "edg": 22, "edit": [12, 65], "ef648e7489c040679d87ed12db5d3214": 88, "effect": [0, 2, 6, 11, 12, 26, 27, 28, 29, 67, 70, 77, 79, 80], "effici": [4, 5, 6, 9, 12, 16, 18, 26, 27, 28, 29, 34, 42, 46, 47, 49, 50, 61, 89, 91, 94, 96, 97, 98], "effort": [12, 15, 27, 28, 59, 77, 93], "eg": 74, "eight": [20, 21], "einop": 82, "einstein": 82, "einsum": 82, "einsum_eq": 82, "either": [0, 1, 2, 3, 18, 26, 28, 46, 49, 59, 70, 82, 89, 92, 93], "element": [0, 1, 5, 6, 10, 11, 70, 82, 83, 90], "element_typ": 1, "elementwis": [7, 82], "elementwise_affin": 83, "elementwise_binari": 82, "elementwise_sub": 7, "elementwise_sum": 7, "elementwiseoper": [7, 82], "eleutherai": 73, "elif": 99, "elimin": [2, 12, 26, 28, 29, 71, 73, 77, 79, 93], "ellipsi": 82, "els": [0, 16, 17, 19, 36, 52, 54, 59, 82, 92, 99], "elsinor": 51, "emb": [16, 61, 83], "embark": 71, "embed": [0, 9, 14, 27, 29, 70, 73, 82, 87, 93, 95, 97], "embed_dim": 83, "embed_posit": 83, "embed_positions_for_gpt_attent": 83, "embed_positions_for_gpt_attention_loc": 83, "embed_positions_loc": 83, "embed_token": [17, 95], "embedding_bia": 70, "embedding_dim": 83, "embedding_multipli": 84, "embedding_parallel_mod": 70, "embedding_scal": 84, "embedding_sharding_dim": [15, 84], "embeddingbia": [0, 1], "embeddingt": [0, 1], "emerg": [25, 26], "emit": 70, "emphasi": 15, "emploi": [12, 96, 99], "empow": 26, "empti": [0, 1, 12, 36, 82, 93, 99], "emptybuff": 1, "emptygenslot": 0, "emptytensor": 1, "emul": [82, 93], "en": 93, "enabl": [0, 2, 3, 5, 6, 7, 10, 11, 12, 13, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 36, 42, 48, 50, 64, 65, 66, 67, 70, 73, 75, 79, 81, 82, 83, 84, 85, 87, 88, 90, 92, 93, 95, 97, 98], "enable_allreduc": 82, "enable_attention_dp": [20, 30, 56, 70], "enable_batch_size_tun": 70, "enable_block_reus": [30, 43, 44, 51, 54, 70], "enable_build_cach": [70, 93], "enable_chunked_context": [87, 93], "enable_chunked_prefil": [70, 93], "enable_context_fmha_fp32_acc": [70, 87], "enable_debug_output": [29, 70, 92], "enable_forward_chunk": 84, "enable_fp8": [11, 59], "enable_if_t": 1, "enable_iter_perf_stat": [30, 70], "enable_iter_req_stat": 70, "enable_kv_cache_reus": 9, "enable_layerwise_nvtx_mark": 70, "enable_lora": [58, 70], "enable_max_num_tokens_tun": [70, 93], "enable_min_lat": 70, "enable_multi_devic": 93, "enable_nvfp4": 59, "enable_overlap_schedul": 30, "enable_partial_reus": 70, "enable_prompt_adapt": [70, 93], "enable_qkv": 83, "enable_tqdm": 70, "enable_trt_overlap": 93, "enable_trtllm_sampl": 70, "enable_ucx": 93, "enable_xqa": 93, "enableattentiondp": [0, 1], "enablebatchsizetun": 0, "enableblockreus": [0, 9], "enablechunkedcontext": 0, "enablecontextfmhafp32acc": 0, "enabled_with_fp32_acc": 5, "enablelookaheaddecod": 1, "enablemaxnumtokenstun": 0, "enablepartialreus": 0, "enableseamlesslookaheaddecod": [0, 1], "enabletrtoverlap": 0, "enc": [29, 87, 93], "enc_dec": 6, "encapsul": [5, 6, 16, 82], "encdecmodelrunn": 87, "encod": [0, 5, 6, 22, 26, 29, 70, 82, 87, 90, 91, 93], "encode_base64_content_from_url": 61, "encoded_vocab": [0, 3], "encodedvocab": [0, 3], "encoder_hidden_st": [83, 84], "encoder_input_featur": 87, "encoder_input_id": 87, "encoder_input_len_rang": 93, "encoder_input_length": [82, 83, 87], "encoder_language_adapter_rout": 87, "encoder_max_input_length": [83, 87], "encoder_output": [83, 84, 87], "encoder_output_length": 87, "encoder_run": 87, "encoderenginebuff": 0, "encoderhiddens": 1, "encoderinputfeatur": 0, "encoderinputtokenid": 0, "encoderjsonconfigstr": 0, "encoderlen": 0, "encodermodel": [0, 84], "encodermodelpath": 0, "encoderoutput": 0, "encoderoutputlength": 0, "encount": [17, 20, 67, 92], "encourag": [0, 6, 19, 70], "end": [0, 1, 5, 6, 16, 27, 29, 43, 44, 49, 53, 54, 59, 70, 71, 73, 77, 80, 81, 82, 88, 93, 98], "end_dim": 82, "end_id": [70, 87, 93], "end_token": [0, 70], "endeavor": 26, "endid": [0, 1], "endpoint": [38, 39, 70, 88, 93], "endswith": 17, "enforc": [75, 82], "engin": [0, 1, 2, 3, 5, 6, 7, 10, 12, 13, 18, 19, 24, 26, 27, 28, 29, 30, 36, 46, 49, 58, 64, 67, 70, 74, 76, 77, 79, 80, 81, 82, 84, 87, 89, 92, 93], "engine_buff": 87, "engine_dir": [13, 14, 15, 16, 19, 70, 73, 75, 87, 88, 92], "engine_inspector": 87, "engine_llama_3": 16, "engine_nam": 87, "engine_output": 29, "engineaddr": 1, "enginebuff": [0, 1], "enginefilenam": 1, "engineinput": 1, "engineoutput": 1, "enginepath": 1, "engines": 1, "enhanc": [4, 6, 12, 20, 26, 27, 28, 71, 81, 89, 94, 97], "enjoi": [32, 42, 46, 47, 49, 50, 52], "enough": [5, 9, 20, 27, 79, 89, 96, 99], "enqueu": [0, 3, 16, 87, 89, 93], "enqueuecontext": 0, "enqueuegener": 0, "enqueuerequest": [0, 2, 3], "ensur": [2, 3, 4, 7, 19, 27, 65, 70, 73, 79, 85, 95, 98], "enter": [7, 31, 74, 79, 98], "enterpris": 45, "entir": [0, 3, 10, 16, 21, 26, 70, 71, 73, 74, 82, 89, 98], "entri": [0, 10, 40, 50, 66, 67, 73, 82, 88, 93], "entrypoint": [31, 69, 75], "enum": [0, 1, 2], "enumer": [0, 1, 48, 52, 94], "env": [30, 33, 34, 35, 37, 38, 39, 73], "envelop": 53, "environ": [6, 12, 20, 26, 34, 55, 56, 57, 61, 64, 65, 67, 72, 73, 75, 77, 79, 80, 92, 93, 94, 97], "environment": 17, "eo": [6, 70], "eof": [20, 27, 30, 56], "eos_token_id": [3, 87], "ep": [4, 20, 26, 27, 30, 73, 82, 83], "ep2": 26, "ep2tp4": 26, "ep4tp2": 26, "ep8": 28, "ep8tp8": 26, "ep_siz": [30, 37], "epsilon": [0, 82], "eq": 82, "equal": [0, 1, 3, 4, 28, 29, 36, 76, 82, 83, 89], "equal_progress": [70, 81], "equat": [24, 82], "equip": [2, 18], "equival": [26, 28, 77, 82, 95], "equvili": 29, "erenup": 93, "err": [55, 56, 57], "error": [0, 2, 3, 10, 19, 28, 29, 30, 59, 64, 65, 67, 70, 75, 79, 89, 93], "errorcod": 69, "errormsg": 0, "especi": [7, 27, 29, 42, 46, 47, 49, 50, 52, 76, 79, 98], "essenti": [12, 73], "establish": 28, "estim": [59, 73, 93, 99], "et": 21, "etc": [0, 1, 12, 67, 70, 72, 77, 80, 87, 89, 92, 95], "ethnzhng": 93, "eval": 45, "evalu": [11, 22, 23, 28, 64, 93], "even": [2, 5, 6, 16, 19, 25, 26, 29, 51, 75, 79, 82, 89], "evenli": [4, 26], "event": [0, 1, 40, 41, 64, 70], "event_buffer_max_s": [51, 70], "event_id": 51, "eventbuffermaxs": 0, "eventid": 0, "eventptr": 1, "ever": [0, 80], "everi": [0, 3, 17, 26, 28, 73, 75, 76, 82, 87], "everyon": 27, "everyth": 16, "evict": [0, 1, 8, 9, 10, 27, 71, 73, 75, 79], "evolv": [5, 19, 26, 71, 90, 98], "ex": [56, 57], "exact": [5, 89], "exam": 26, "examin": 12, "exampl": [0, 5, 6, 7, 9, 12, 13, 14, 18, 19, 21, 23, 25, 27, 30, 36, 45, 52, 55, 59, 64, 65, 69, 70, 74, 75, 76, 77, 78, 79, 80, 81, 82, 87, 88, 89, 90, 91, 92, 93, 94, 95, 97, 99], "example_logits_processor": 52, "exaon": [17, 91, 93], "exc": 48, "exce": [0, 2, 70, 81, 82], "exceed": [0, 89], "except": [0, 3, 5, 6, 19, 26, 27, 29, 54, 76, 82, 92, 93], "excess": 5, "exchang": 70, "excit": [42, 46, 47, 48, 49, 50], "exclud": [70, 77, 82, 93], "exclude_input_from_output": 70, "exclude_modul": [15, 70, 93], "excludeinputfromoutput": 0, "exclus": [1, 6, 90, 93], "exec": 72, "execut": [0, 2, 3, 6, 10, 12, 16, 18, 19, 26, 28, 64, 70, 71, 72, 73, 79, 81, 82, 87, 88, 89, 96, 99], "executor": [1, 2, 9, 12, 13, 18, 36, 52, 58, 64, 70, 71, 73, 81, 87, 89, 93, 96], "executor_config": 98, "executorconfig": [0, 3, 13], "executorexampledisaggreg": 2, "executorexamplefastlogit": 93, "exhaust": [0, 18], "exist": [1, 6, 9, 10, 12, 17, 19, 26, 28, 29, 51, 67, 70, 73, 87, 93, 97], "exit": [74, 87], "exp": 82, "expand": [0, 23, 25, 27, 82, 87, 93], "expand_dim": 82, "expand_dims_lik": 82, "expand_mask": 82, "expand_shap": 82, "expans": 82, "expect": [0, 5, 6, 11, 14, 16, 17, 19, 23, 27, 29, 36, 55, 56, 57, 64, 70, 73, 75, 78, 82, 92, 93], "expens": [3, 12, 71, 76, 77, 81], "experi": [12, 24, 25, 26, 28, 69, 71, 72, 73, 92], "experiment": [5, 6, 12, 17, 27, 30, 55, 56, 57, 64, 73, 90, 93, 94], "expert": [10, 20, 30, 50, 64, 70, 80, 93], "expertis": [26, 28], "expir": 0, "explain": [6, 16, 18, 28, 79, 82, 89, 90, 96, 97], "explan": [20, 28, 80, 87, 89], "explicit": [0, 1, 12, 82, 93], "explicit_draft_token": [12, 29, 84], "explicitdrafttoken": [0, 1], "explicitdrafttokensinput": 1, "explicitdrafttokenslastinput": 1, "explicitdrafttokensmodul": 1, "expliciteosstop": 0, "explicitli": [1, 2, 7, 12, 16, 17, 28, 29, 30, 36, 70, 93], "explor": [12, 26, 28, 71], "expon": 22, "exponenti": 12, "export": [2, 11, 15, 19, 20, 26, 27, 29, 30, 38, 39, 55, 56, 57, 73, 86, 87, 92, 93], "export_fmt": 94, "expos": [0, 6, 16, 32, 65, 77, 93], "express": [0, 3, 70, 82], "extend": [0, 3, 9, 16, 26, 27, 28, 70, 80, 82, 93], "extended_runtime_perf_knob_config": [70, 93], "extendedruntimeperfknobconfig": [0, 70], "extens": [15, 18, 67, 71, 73, 93], "extern": [0, 7, 8, 17, 87, 89], "external_checkpoint_dir": 17, "external_kei": 17, "external_weight": 17, "externaldrafttoken": 0, "externaldrafttokensconfig": [0, 1], "externaldrafttokensinput": 1, "externalstream": 52, "extra": [0, 2, 5, 9, 12, 15, 20, 26, 27, 29, 30, 37, 67, 70, 74, 76, 77, 87, 93], "extra_arg": 56, "extra_data": 70, "extra_id": 9, "extra_llm_api_opt": [20, 27, 30, 37, 56, 73, 74], "extra_resource_manag": 70, "extra_token": 83, "extract": [0, 3, 65, 72, 78, 82, 87], "extrapol": 82, "extrem": [16, 26, 77, 79, 80], "f": [0, 5, 6, 31, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 66, 67, 70, 72, 75, 81, 82, 88, 92, 94], "face": [3, 10, 13, 18, 19, 36, 70, 73, 84, 88, 93], "facilit": [7, 12, 88], "fact": [71, 73, 80], "factor": [25, 28, 76, 77, 82, 83, 89, 90], "factori": [19, 70, 87, 93], "factual": 6, "fail": [70, 87, 89, 92, 99], "failur": [17, 93], "fairli": 16, "fairseq": [91, 93], "fake": [9, 93], "fakebuff": 1, "falcon": [15, 25, 69, 73, 90, 91, 93], "falconconfig": 84, "falconforcausallm": 84, "falconmodel": 84, "fall": [11, 67, 74, 93], "fallback": 17, "fals": [0, 1, 2, 3, 5, 6, 7, 9, 15, 26, 28, 29, 30, 45, 51, 54, 56, 70, 82, 83, 84, 85, 86, 87, 93], "false_output_valu": 82, "false_valu": 82, "famili": [5, 17, 91, 93], "familiar": [6, 16, 69, 75, 76, 78, 88], "famou": [6, 59], "faq": 64, "far": [0, 3, 27], "fast": [0, 5, 8, 12, 70, 73, 76, 93], "fast_build": [29, 70, 93], "fastapi": 93, "fastapi_serv": 93, "faster": [5, 19, 22, 23, 27, 28, 29, 74, 75, 82], "fasterdecod": 54, "fastlogit": 0, "fault": 93, "favor": 93, "favorit": 58, "fc": [15, 16, 17, 92], "fc_gate": 83, "fc_gate_dora": 83, "fc_gate_lora": 83, "fc_gate_plugin": 83, "featur": [0, 2, 3, 5, 7, 8, 10, 11, 12, 15, 16, 17, 19, 25, 26, 27, 28, 29, 55, 56, 57, 64, 65, 73, 77, 79, 80, 81, 82, 85, 87, 91, 94, 95, 97], "feature_dim": 87, "februari": 28, "fed": [74, 84], "feed": 82, "feedback": 93, "feedforward": 4, "feel": 58, "fetch": [0, 27, 30, 96], "few": [9, 16, 19, 25, 28, 79], "fewer": [5, 12, 21, 97], "ffn": [4, 26], "ffn_hidden_s": 83, "fhma": 93, "field": [0, 6, 15, 19, 30, 32, 36, 70, 71, 73, 77, 84, 85, 90, 93, 97], "field_nam": 70, "figur": [26, 27], "file": [0, 3, 4, 5, 7, 9, 15, 16, 17, 19, 20, 27, 29, 30, 38, 39, 67, 70, 72, 73, 74, 87, 88, 93, 95], "filepath": 1, "filesystem": [0, 1], "fill": [1, 17, 32, 42, 46, 47, 49, 50, 82, 97], "fill_attention_const_params_for_long_rop": 83, "fill_attention_const_params_for_rop": 83, "fill_attention_param": 83, "fill_none_tensor_list": 83, "fill_valu": [52, 82], "fillemptyfieldsfromruntimedefault": 0, "filloper": 82, "filltaskstensor": 1, "filter_medusa_logit": 87, "final": [0, 1, 10, 26, 27, 29, 30, 31, 36, 82, 99], "final_logit_softcap": 84, "final_output_id": 87, "finalize_decod": 87, "find": [20, 28, 77, 82, 92, 93], "find_best_medusa_path": 87, "fine": [12, 20, 28, 73, 80, 83], "finer": 7, "finetun": 26, "finish": [0, 1, 3, 6, 8, 19, 27, 36, 53, 69, 70, 71, 73, 87, 96, 98], "finish_reason": [53, 70, 88, 93], "finishedst": 1, "finishedsum": 1, "finishreason": [0, 1, 93], "first": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 18, 23, 25, 27, 28, 29, 30, 31, 67, 69, 70, 73, 74, 75, 77, 79, 80, 81, 82, 89, 92, 93, 95, 97, 98, 99], "first_come_first_serv": [70, 81], "first_gen_token": 70, "first_lay": 87, "firstgentoken": 0, "firstit": 0, "firstli": [28, 31, 79, 89], "firstscheduledtim": 0, "firsttokentim": 0, "fit": [1, 5, 21, 22, 70, 76, 77, 99], "fitting_request": 99, "fix": [8, 10, 12, 28, 73, 89], "fjosw": 93, "flag": [0, 1, 3, 5, 10, 19, 24, 30, 36, 64, 73, 77, 78, 79, 81, 82, 89, 93], "flags_siz": 1, "flan": [90, 91], "flash": [5, 16], "flashattent": [5, 16, 88], "flashinf": 97, "flashinferattent": 97, "flashmla": [27, 93], "flatten": [1, 10, 24, 82, 83], "flattenedinouts": 1, "flattenn": 1, "flayer": 7, "flayerinfomemo": 7, "flexibl": [12, 19, 26, 36, 65], "flight": [1, 18, 64, 73, 79, 81, 88, 89, 93], "flip": 82, "flip_sin_to_co": 83, "float": [0, 1, 6, 13, 15, 16, 22, 52, 70, 81, 82, 83, 84, 87, 90], "float16": [7, 10, 13, 14, 15, 19, 29, 76, 82, 84, 85, 88, 92], "float2": 82, "float32": [0, 15, 29, 82, 83, 84, 85], "floattensor": 95, "floattyp": [0, 1], "floor_div": 82, "floordiv": 82, "flop": 28, "flow": [7, 19, 26, 28, 75, 76, 77, 79, 80, 93, 96, 99], "fly": [5, 82, 90], "fmha": [0, 29, 70, 82, 87, 89, 93], "fmt_dim": 1, "focu": [7, 25, 26, 72], "focus": [12, 73, 77, 78, 93], "fold": 89, "folder": [0, 3, 6, 19, 75, 90, 91, 93], "folder_trt_llm": 16, "follow": [3, 6, 7, 10, 12, 14, 15, 16, 17, 19, 20, 25, 26, 27, 28, 29, 30, 32, 36, 47, 48, 51, 55, 56, 57, 65, 66, 67, 69, 73, 74, 75, 76, 77, 78, 79, 80, 82, 88, 90, 91, 93, 94, 95, 97, 98], "footprint": [5, 21, 28, 89], "for_each_rank": 84, "forc": [0, 5, 11, 26, 73], "force_drop_id": 83, "force_low_precision_all_reduce_strategi": 11, "force_multi_block_mod": 73, "force_nccl_all_reduce_strategi": 93, "force_num_profil": 70, "force_words_id": 6, "forecast": 12, "foretel": 51, "fork": 72, "form": [0, 3, 5, 12, 70, 82, 88], "format": [0, 3, 11, 15, 17, 19, 22, 25, 27, 28, 39, 64, 65, 69, 70, 71, 75, 77, 87, 88, 89, 92, 93, 97], "former": [16, 25, 51], "formula": [28, 82], "forum": 93, "forward": [0, 1, 7, 12, 14, 16, 27, 52, 81, 82, 83, 84, 92, 93, 95, 96, 97, 98, 99], "forward_loop": 73, "forward_with_cfg": 84, "forward_without_cfg": 84, "forwardasync": 1, "forwarddispatch": 1, "forwardsync": 1, "found": [3, 4, 5, 6, 7, 12, 16, 18, 22, 65, 67, 73, 75, 77, 80, 90, 99], "foundat": 27, "four": [3, 7, 12, 15, 26, 27, 83], "fourth": 3, "fp": [90, 93], "fp16": [5, 10, 11, 13, 15, 17, 21, 22, 25, 29, 64, 73, 77, 80, 82, 88, 91, 92, 93], "fp32": [0, 5, 26, 28, 29, 64, 70, 82, 87, 88, 91, 92, 93], "fp4": [20, 27, 28, 29, 93], "fp8": [11, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 46, 54, 59, 64, 70, 73, 78, 80, 82, 85, 89, 91, 93, 94, 97], "fp8_block_scal": 70, "fp8_blockscale_gemm": 93, "fp8_inputs_overrid": 82, "fp8_kv_cach": [5, 90], "fp8_per_channel_per_token": 70, "fp8_qdq": 90, "fp8_rowwise_gemm_plugin": 29, "fp_valu": 5, "fpa_intb": 93, "fraction": [0, 30, 70, 82, 83, 87], "framework": [12, 14, 15, 18, 19, 71, 82, 93], "franc": [14, 16, 40, 42, 43, 44, 46, 47, 48, 49, 50, 54, 59, 66, 67, 75, 81, 88, 92, 94], "free": [0, 1, 8, 10, 16, 17, 28, 30, 71, 79, 83, 84, 87, 89, 98], "free_gpu_memory_fract": [30, 36, 49, 53, 70, 81, 93], "free_resourc": [96, 98], "freed": 73, "freedom": 19, "freegpumemoryfract": [0, 89, 93], "freenumblock": 0, "freez": 28, "french": 88, "freq": 82, "frequenc": [73, 83], "frequency_penalti": [70, 87, 93], "frequencypenalti": [0, 1, 6], "frequent": [9, 92], "friend": [0, 1, 73], "friendli": 82, "from": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 31, 32, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 66, 67, 69, 70, 71, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 92, 93, 94, 95, 96, 97, 98, 99], "from_argu": 84, "from_checkpoint": [19, 84], "from_config": 84, "from_dict": [70, 84], "from_dir": 87, "from_engin": 87, "from_hugging_fac": [14, 17, 19, 84], "from_jax": 19, "from_json_fil": [70, 84], "from_kera": 19, "from_meta_ckpt": [19, 84], "from_nemo": [19, 84], "from_pretrain": 84, "from_prun": 84, "from_serialized_engin": 87, "from_str": 82, "fromfil": 16, "fruit": 28, "full": [0, 4, 5, 6, 9, 10, 12, 22, 23, 27, 28, 30, 70, 71, 72, 73, 76, 82, 87, 88, 89, 92], "full_lik": 52, "fulli": [28, 40, 93], "funcnam": 0, "function": [0, 1, 3, 5, 13, 14, 16, 18, 19, 26, 27, 69, 70, 71, 72, 80, 85, 87, 89, 90, 91, 92, 93, 98, 99], "functiont": 0, "further": [3, 4, 5, 12, 16, 21, 25, 27, 28, 29, 73, 77, 80, 97], "furthermor": [12, 26, 77], "fuse": [5, 12, 16, 26, 28, 29, 80, 82, 88, 93, 95, 97], "fuse_a": [26, 28], "fuse_fp4_qu": 29, "fuse_qkv_project": 84, "fuseattentionwithbiaspass": 7, "fused_gate_up_dora": 83, "fused_gate_up_lora": 83, "fused_mo": 70, "fusedgatedmlp": [82, 83], "fusevalu": 1, "fusion": [7, 28, 29, 64, 71, 79, 89, 90, 93, 97], "fusion_op": 82, "futur": [2, 5, 6, 8, 12, 17, 19, 25, 29, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 54, 59, 65, 66, 67, 69, 70, 71, 73, 75, 81, 82, 88, 89, 90, 93, 94], "fuyu": [91, 93], "g": [3, 8, 11, 17, 27, 28, 30, 52, 55, 56, 57, 70, 79, 87, 95], "g1": 79, "g2": 79, "gain": [76, 79], "gamma": 82, "gate": [10, 17, 29, 75, 82, 93], "gate_a": 82, "gate_a_bia": 82, "gate_bia": 82, "gate_proj": 17, "gate_x": 82, "gate_x_bia": 82, "gatedmlp": [82, 83], "gather": [0, 1, 29, 47, 48, 70, 82, 87], "gather_all_token_logit": [29, 93], "gather_context_logit": [29, 70, 84, 87], "gather_dim": [16, 82], "gather_generation_logit": [29, 70, 84, 87], "gather_last_token_logit": 82, "gather_nd": 82, "gather_output": 83, "gathercontext": [0, 93], "gatheredid": 1, "gatherel": 82, "gathergenerationlogit": 0, "gathermod": 82, "gathertre": 1, "gatherv2": 82, "gb": [2, 23, 28, 65, 70, 73], "gb200": [28, 93], "gcc": 65, "gd": 0, "gdrdma": 2, "geforc": 93, "gegelu": 82, "gegelu_limit": 83, "geglu": 82, "gelu": [82, 84], "gelu_pytorch_tanh": 93, "gelu_tanh": 83, "gemm": [7, 28, 29, 79, 82, 88, 89, 93], "gemm_allreduc": 82, "gemm_allreduce_plugin": [29, 87], "gemm_fc1": 26, "gemm_plugin": [10, 13, 15, 16, 29, 73, 77, 80, 83, 88], "gemm_swiglu": 82, "gemm_swiglu_plugin": [29, 77, 85], "gemma": [19, 69, 90, 91, 93], "gemma2": 91, "gemma2_added_field": 84, "gemma2_config": 84, "gemma3": 93, "gemma3_added_field": 84, "gemma3_config": 84, "gemma_added_field": 84, "gemma_config_kwarg": 84, "gemmaconfig": 84, "gemmaforcausallm": 84, "gen": [70, 93], "genai": [25, 30, 63], "genattent": 26, "genenginepath": 0, "gener": [0, 1, 3, 6, 9, 12, 15, 16, 17, 19, 20, 21, 22, 24, 26, 27, 28, 29, 40, 41, 42, 51, 64, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, 84, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99], "generate_alibi_bias": 82, "generate_alibi_slop": 82, "generate_async": [36, 47, 48, 70, 93], "generate_logn_sc": 82, "generate_tllm_weight": 17, "generated_text": [40, 43, 44, 54, 58, 66, 67, 75, 81, 88, 94], "generatedtokensperenginestep": 1, "generation_complet": 99, "generation_in_progress": 99, "generation_logit": [53, 70, 87], "generation_onli": 70, "generation_phas": 5, "generation_request": 99, "generation_to_complet": 99, "generationexecutor": [2, 93], "generationlength": 1, "generationlengthsdevic": 1, "generationlengthshost": 1, "generationlengthshostcopi": 1, "generationlogit": 0, "generationmixin": 84, "generationrequestid": 2, "generationresult": 70, "generationsequ": 87, "generationsess": [5, 87, 89], "generationstep": 1, "genericprompttuningparam": 1, "genert": 2, "genexecutorconfig": 0, "genidx": 0, "genrequest": 1, "genrespons": 2, "get": [0, 1, 2, 3, 5, 7, 10, 13, 17, 24, 27, 28, 30, 31, 32, 36, 40, 41, 65, 66, 67, 70, 71, 72, 75, 77, 82, 84, 87, 88, 92, 93, 94, 99], "get_1d_sincos_pos_embed_from_grid": 83, "get_2d_sincos_pos_emb": 83, "get_2d_sincos_pos_embed_from_grid": 83, "get_audio_featur": 87, "get_batch_cache_indic": 98, "get_batch_idx": 87, "get_block_offset": 87, "get_buff": 98, "get_comm": 70, "get_config_group": 84, "get_context_phase_param": 70, "get_device_cap": 59, "get_first_past_key_valu": 83, "get_hf_config": 84, "get_input": 7, "get_kv_cache_ev": [51, 70], "get_kv_cache_events_async": 70, "get_max_resource_count": [98, 99], "get_needed_resource_to_complet": [98, 99], "get_next_medusa_token": 87, "get_num_free_block": 98, "get_num_heads_kv": 87, "get_output": [7, 16], "get_par": [7, 82], "get_pytorch_backend_config": 70, "get_request_typ": 70, "get_rope_index": 87, "get_seq_idx": 87, "get_shap": 17, "get_slic": 17, "get_stat": [70, 93], "get_stats_async": 70, "get_timestep_embed": 83, "get_us": [7, 82], "get_visual_featur": 87, "get_vocab": [0, 3], "get_weight": 83, "getacceptancethreshold": 0, "getacceptedlengthscumsum": 1, "getacceptedpackedpath": 1, "getadditionalmodeloutput": 0, "getadditionaloutputnam": 0, "getaddr": 0, "getaddress": 1, "getagentst": 0, "getallnewtoken": 1, "getallottedtimem": 0, "getattentionconfig": 0, "getbackend": 0, "getbackendagentdesc": 0, "getbadword": 0, "getbatchingtyp": 0, "getbatchsizet": 0, "getbeamsearchbuff": 1, "getbeamsearchdiversityr": 0, "getbeamwidth": 0, "getbeamwidtharrai": 0, "getbuffermanag": 1, "getcachest": 0, "getcachetransceiverconfig": 0, "getcapac": 1, "getcapacityschedulerpolici": 0, "getclientid": 0, "getcommptr": 1, "getcommst": 0, "getcommunicationmod": 0, "getcommunicationtyp": 0, "getconfig": 0, "getconnect": 0, "getconnectioninfo": 0, "getcontextchunkingpolici": 0, "getcontextexecutor": 0, "getcontextfmha": 1, "getcontextparallel": 1, "getcontextparallelgroup": 1, "getcontextparallelrank": 1, "getcontextphaseparam": 0, "getcopyonpartialreus": 0, "getcpu": 1, "getcpudiff": 1, "getcrossattentionmask": 0, "getcrosskvcachefract": 0, "getcudagraphcaches": 0, "getcudagraphmod": 0, "getcumlogprob": 1, "getdata": 0, "getdatatyp": [0, 1], "getdatatypenam": 1, "getdebugconfig": 0, "getdebuginputtensor": 0, "getdebugoutputtensor": 0, "getdebugtensornam": 0, "getdebugtensorsmaxiter": 0, "getdecodedurationm": 0, "getdecoderetentionprior": 0, "getdecoderst": 1, "getdecoderstream": 1, "getdecodingconfig": 0, "getdecodingmod": 0, "getdefaultbatchslot": 1, "getdefaulteaglechoic": 1, "getdesc": 0, "getdevic": 1, "getdevicecacheperc": 0, "getdeviceid": 0, "getdeviceof": 1, "getdimens": 1, "getdirectori": 0, "getdrafttoken": 0, "getdstdesc": 0, "getdynamicbatchconfig": 0, "getdynamicbatchmovingaveragewindow": 0, "getdynamictreemaxtopk": 0, "geteaglechoic": 0, "geteagleconfig": 0, "getearlystop": 0, "getembeddingbia": 0, "getembeddingt": 0, "getenablebatchsizetun": 0, "getenableblockreus": 0, "getenablechunkedcontext": 0, "getenablecontextfmhafp32acc": 0, "getenablemaxnumtokenstun": 0, "getenablepartialreus": 0, "getenabletrtoverlap": 0, "getencodedvocab": 0, "getencoderhiddens": 1, "getencoderinputfeatur": 0, "getencoderinputtokenid": 0, "getencoderoutputlength": 0, "getendid": 0, "geterrormsg": 0, "geteventbuffermaxs": 0, "getexecutionconfig": 1, "getextendedruntimeperfknobconfig": 0, "getexternaldrafttokensconfig": 0, "getfastlogit": 0, "getfinishedstep": 1, "getfinishedsum": 1, "getfinishreason": 1, "getfirstgentoken": 0, "getfirstlocallay": 1, "getfreegpumemoryfract": 0, "getfrequencypenalti": 0, "getfunctionpoint": 0, "getgatheredid": 1, "getgathergenerationlogit": 0, "getgemmallreducedtyp": 1, "getgenexecutor": 0, "getgpu": 1, "getgpudiff": 1, "getgpuspergroup": 1, "getgpuspernod": 1, "getgpuweightsperc": [0, 13], "getguid": 0, "getguideddecodingconfig": 0, "getguideddecodingparam": 0, "getguidetyp": 0, "gethandl": 0, "gethiddens": 1, "gethostcaches": 0, "gethostmemori": 1, "getid": 1, "getinittozero": 1, "getinputtokenextraid": 0, "getinputtokenid": 0, "getinst": [0, 1], "getipcunicastpoint": 1, "getisorchestr": 0, "getiterstatsmaxiter": 0, "getjointdecodinginput": 1, "getjointdecodingoutput": 1, "getkvcacheconfig": 0, "getkvcacheconfigref": 0, "getkvcacheeventmanag": 0, "getkvcacheretentionconfig": 0, "getkvcachetyp": 1, "getkvdatatyp": 1, "getlanguageadapteruid": 0, "getlastrank": 1, "getlatestdebugtensor": 0, "getlatestev": 0, "getlatestiterationstat": [0, 3], "getlatestrequeststat": 0, "getlayertyp": 1, "getlen": 0, "getlengthpenalti": 0, "getlevel": 1, "getlocalagentdesc": 0, "getlocalrank": 1, "getlogit": 0, "getlogitsdtyp": 1, "getlogitspostprocessor": 0, "getlogitspostprocessorconfig": 0, "getlogitspostprocessornam": 0, "getlogprob": 1, "getlookaheadconfig": 0, "getlookaheaddecodingconfig": 0, "getlookaheaddecodingmaxnumrequest": 0, "getloraconfig": 0, "getloramodul": 1, "getloraprefetchdir": 0, "getmanagedweightsmapopt": 1, "getmanageweightstyp": 1, "getmaxadapters": 0, "getmaxattentionwindowvec": 0, "getmaxbatchs": [0, 1], "getmaxbeamwidth": [0, 1], "getmaxdecodingdecodertoken": 1, "getmaxdecodingdrafttoken": 1, "getmaxdecodingenginetoken": 1, "getmaxdecodingtoken": 1, "getmaxdraftpathlen": 1, "getmaxencoderlen": 1, "getmaxinputlen": 1, "getmaxlorarank": 1, "getmaxnonleafnodesperlay": 1, "getmaxnumpath": 1, "getmaxnumtoken": [0, 1], "getmaxpagesperblock": 1, "getmaxpagesperblockdevic": 0, "getmaxpagesperblockhost": 0, "getmaxpathlen": 1, "getmaxpositionembed": 1, "getmaxpromptembeddingtables": 1, "getmaxqueues": 0, "getmaxseqidlemicrosecond": 0, "getmaxsequencelen": 1, "getmaxsequencelength": 1, "getmaxtoken": 0, "getmedusachoic": [0, 1], "getmemorytyp": [0, 1], "getmemorytypenam": 1, "getminp": 0, "getmintoken": 0, "getmlphiddens": 1, "getmodelconfig": [0, 1], "getmodelconfigmut": 1, "getmodelnam": 1, "getmodelvari": 1, "getmpist": 0, "getmropeconfig": 0, "getmropepositiondelta": 0, "getmroperotarycossin": 0, "getmultiblockmod": 0, "getmulticastpoint": 1, "getmultimodalembed": 0, "getnam": [0, 1], "getnbattentionlay": 1, "getnbhead": 1, "getnbkvhead": 1, "getnblay": 1, "getnbrnnlay": 1, "getnextdrafttoken": 1, "getnextdrafttokenslength": 1, "getngrams": 0, "getnoderank": 1, "getnoderankof": 1, "getnorepeatngrams": 0, "getnormalizelogprob": 0, "getnotifiedsyncmessag": 0, "getnumcopystream": [0, 1], "getnumdecodingenginetoken": 1, "getnumdevicemodulelay": 0, "getnumensurework": 0, "getnumhostmodulelay": 0, "getnumkvheadsperlay": 1, "getnumkvheadsperlayerlocalrang": 1, "getnumlanguag": 1, "getnumnod": 0, "getnumpackedmask": 1, "getnumpag": 1, "getnumputwork": 0, "getnumresponsesreadi": 0, "getnumreturnbeam": [0, 1], "getnumreturnsequ": 0, "getnumtransformerlay": 1, "getonboardblock": 0, "getop": 0, "getoptimaladapters": 0, "getoptprofilessplitpoint": 1, "getorchestratorconfig": 0, "getorchleadercomm": 0, "getoutputconfig": 0, "getpadid": 0, "getpagedcontextfmha": 1, "getpageptr": 1, "getpagewidth": 1, "getparallelconfig": 0, "getparentid": 1, "getparticipantid": 0, "getpath": 1, "getpathopt": 1, "getpeftcacheconfig": 0, "getperblockretentionprioritydur": 0, "getpin": 1, "getpinneddiff": 1, "getpinnedpool": 1, "getpinnedpooldiff": 1, "getpipelineparallel": 1, "getpipelineparallelgroup": 1, "getpipelineparallelrank": 1, "getpositionid": 0, "getposteriorthreshold": 0, "getppreducescatt": 1, "getprecis": 1, "getpresencepenalti": 0, "getprevdrafttokenslength": 1, "getprior": 0, "getprocessorbatch": 0, "getprocessormap": 0, "getprompttableoffload": 0, "getprompttuningconfig": 0, "getquantmod": 1, "getrank": 1, "getrecvpollperiodm": 0, "getremotenam": 0, "getrepetitionpenalti": 0, "getrepl": 0, "getreqid": 0, "getrequestid": 0, "getrequeststatsmaxiter": 0, "getrequesttyp": 0, "getresult": [0, 2, 3], "getreturnallgeneratedtoken": 0, "getrnnconfig": 1, "getrotaryembeddingdim": 1, "getruntimedefault": 1, "getruntimetyp": 0, "getsamplingconfig": [0, 1], "getschedulerconfig": 0, "getschedulerconfigref": 0, "getse": 0, "getsecondaryoffloadminprior": 0, "getselfidx": 0, "getsequencelength": 1, "getserializedst": 0, "getshap": [0, 1], "getsinktokenlength": 0, "getsiz": [0, 1], "getsizeinbit": 1, "getsizeinbyt": [0, 1], "getsizeperhead": 1, "getskipcrossattnblock": 0, "getslotsperpag": 1, "getsocketst": 0, "getspawnprocess": 0, "getspecdecconfig": 0, "getspeculativedecodingmod": 1, "getspeculativedecodingmodul": 1, "getspeculativedecodingmoduleptr": 1, "getsrcdesc": 0, "getstat": 0, "getstatu": 1, "getstoptokenid": 0, "getstopword": 0, "getstream": [0, 1], "getsumlocalkvhead": 1, "getsyncmessag": 0, "gettag": 0, "gettaskid": 0, "gettemperatur": 0, "gettensorparallel": 1, "gettensorparallelgroup": 1, "gettensorparallelrank": 1, "getter": 6, "gettoken": 0, "gettokenizerstr": 0, "gettokenrangeretentionconfig": 0, "gettokensperblock": 1, "gettopk": 0, "gettopp": 0, "gettoppdecai": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettotalnumpag": 1, "gettransfermod": 0, "gettyp": [0, 1], "getunderlyingdecod": 1, "getunicastpoint": 1, "getusegpudirectstorag": 0, "getuvm": 1, "getuvmdiff": 1, "getverificationsets": 0, "getvers": 1, "getvocabs": 1, "getvocabsizepad": 1, "getweight": 0, "getwindows": 0, "getworkerexecutablepath": 0, "getworlds": 1, "gh200": 93, "ghost": 51, "ghz": 45, "gib": [9, 89], "gid": 0, "gigabyt": 23, "git": [10, 20, 65, 69, 88, 92, 94], "github": [19, 20, 26, 65, 69, 71, 88, 93, 94], "give": [3, 27, 28, 71, 77, 79, 84], "given": [0, 1, 3, 6, 10, 17, 19, 23, 69, 70, 72, 78, 79, 82, 83, 84, 86, 87, 89, 90, 93, 98], "givyboi": 58, "glm": [69, 82, 91, 93], "glm4": [69, 93], "global": [0, 5, 8, 16, 26, 28, 93], "global_max_input_length": 87, "global_max_output_length": 87, "globalrequestid": 0, "glossari": [21, 24], "gm": 92, "gnu": 65, "go": [5, 6, 51, 76, 93], "goal": 81, "goe": [27, 69, 73], "good": [3, 16, 20, 28, 73, 76, 79, 80], "got": [0, 42, 45, 46, 47, 48, 49, 50, 51, 52, 58, 59, 69, 73, 92], "gpqa": [26, 28], "gpt": [1, 5, 12, 16, 18, 22, 25, 29, 64, 69, 73, 82, 89, 90, 91, 92, 93], "gpt2": [84, 92], "gpt3": 23, "gpt_attent": [5, 7, 24, 82, 88, 93], "gpt_attention_plugin": [10, 16, 29, 73, 83, 87, 92, 93], "gpt_attention_plugin_remove_pad": 7, "gpt_variant": [84, 93], "gptattent": 7, "gptattentionpluginremovepaddingrewritepass": 7, "gptconfig": 84, "gptdecod": 6, "gptdecoderbatch": 93, "gptdecoderptr": 1, "gptforcausallm": 84, "gptj": 84, "gptjconfig": 84, "gptjforcausallm": 84, "gptjmodel": 84, "gptlmheadmodel": 92, "gptmanag": 93, "gptmanagerbenchmark": [9, 65, 93], "gptmodel": 84, "gptmodelconfig": 93, "gptneoxforcausallm": 84, "gptneoxmodel": 84, "gptq": [25, 64, 91, 93], "gptsession": 93, "gptsessionbenchmark": 93, "gpu": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 15, 18, 19, 22, 23, 24, 25, 27, 29, 30, 36, 55, 56, 57, 59, 64, 65, 67, 69, 70, 74, 75, 76, 77, 80, 82, 84, 87, 88, 91, 92, 93, 96, 97], "gpu_weights_perc": [13, 87], "gpudirect": 0, "gpumemusag": [0, 30], "gpus_per_nod": [29, 30, 70], "gpuspernod": [1, 6], "gpusync": 1, "gpuweightsperc": [0, 13], "gqa": [5, 8, 21, 24, 29, 82, 93, 97], "grace": [9, 64, 91], "gradient": 22, "gradual": 19, "grain": 7, "gram": 12, "grammar": [0, 3, 70], "granit": [91, 93], "graph": [0, 16, 20, 27, 28, 64, 70, 72, 73, 82, 87, 88, 89, 92, 93, 97, 98], "graph_rewrit": 7, "graphic": 53, "gratitud": 27, "gre": 30, "great": [21, 53], "greater": [0, 2, 5, 24, 25, 26, 29, 82], "greatli": [9, 19, 27, 77, 80], "greedi": [0, 6, 96], "greedy_sampl": [43, 44, 70], "greedysampl": 0, "greedysamplinghost": 1, "grid": [16, 77, 79, 82, 83], "grid_search_engin": 75, "grid_siz": 83, "grok": [91, 93], "ground": 72, "groundbreak": 71, "group": [0, 3, 4, 6, 8, 16, 21, 28, 64, 70, 82, 83, 90, 93, 97], "group_cl": 84, "group_norm": 82, "group_siz": [15, 70, 82], "groupedrmsnorm": 26, "groupgemm": 28, "groupnorm": [82, 83], "grow": [1, 12, 79], "gsm8k": 28, "gt": 82, "gtc": [20, 26], "guarante": [0, 6, 9, 19, 73, 74, 75, 77, 81], "guaranteed_no_evict": [0, 70, 73, 81], "guaranteednoevictschedul": 99, "guard": [51, 75], "guid": [0, 16, 20, 25, 40, 41, 64, 69, 70, 71, 72, 74, 75, 76, 77, 80, 82, 92, 93, 97], "guidanc": [12, 30, 80, 83, 84], "guided_decod": [45, 70], "guided_decoding_backend": [45, 70], "guideddecodingbackend": 0, "guideddecodingconfig": [0, 3], "guideddecodingparam": [0, 3, 45, 70], "guidelin": [2, 76], "guidetyp": [0, 3], "gw": 7, "h": [2, 3, 5, 12, 17, 27, 29, 30, 33, 34, 35, 75, 82, 84, 88, 93], "h0": 27, "h1": 82, "h100": [19, 25, 29, 71, 74, 75, 77, 78, 79, 93], "h20": 29, "h200": [22, 29, 74, 93], "h2d": 52, "ha": [0, 1, 3, 5, 9, 10, 11, 15, 16, 17, 19, 20, 21, 25, 26, 27, 28, 29, 32, 65, 70, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 87, 89, 90, 92, 93, 96, 98, 99], "had": [19, 28, 77, 79], "half": [0, 1, 16, 28, 75, 82], "halv": [22, 82], "hand": [9, 12, 18, 76], "handl": [0, 1, 2, 4, 8, 17, 19, 21, 26, 75, 77, 79, 80, 81, 82, 83, 95, 96], "handle_per_step": 87, "hang": [0, 69, 92, 93], "happen": [3, 6, 9, 16, 67, 89, 92], "happi": 87, "har": 28, "hard": [5, 70], "harder": 6, "hardwar": [8, 25, 28, 36, 64, 65, 93], "has_affin": 82, "has_bia": 82, "has_config_group": 84, "has_position_embed": 87, "has_scal": 82, "has_token_type_embed": 87, "has_zero_point": [15, 70], "hascontextawaitthread": 0, "hasdraftlogit": 1, "haserror": [0, 3], "hasgenawaitthread": 0, "hash": [0, 70], "hasresult": 0, "hasrnnconfig": 1, "hasspeculativedecodingmodul": 1, "hattizai": 93, "have": [0, 1, 3, 4, 5, 6, 9, 10, 12, 15, 16, 17, 19, 20, 21, 23, 25, 26, 27, 28, 29, 31, 51, 54, 55, 56, 57, 58, 67, 69, 70, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 87, 88, 89, 91, 92, 93, 95], "hbm3": 74, "hbm3e": 23, "he": 51, "head": [1, 6, 8, 12, 16, 21, 27, 28, 29, 54, 59, 64, 73, 82, 83, 93, 97], "head_dim": [97, 98], "head_siz": [5, 82, 84, 87, 93], "header": 2, "headsiz": 82, "headsperlay": 1, "health": [30, 58], "heat": 6, "heavi": 80, "heavier": 76, "height": [39, 83, 87], "hello": [40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 55, 58, 59, 66, 67, 75, 81, 88, 94], "help": [2, 3, 5, 7, 16, 26, 27, 29, 30, 33, 34, 45, 52, 54, 60, 61, 65, 72, 73, 74, 75, 78, 79, 80, 81, 82, 88, 93, 96], "helper": [1, 82], "henc": 95, "here": [2, 3, 7, 10, 13, 14, 15, 16, 17, 19, 20, 22, 23, 27, 28, 30, 32, 36, 40, 45, 65, 72, 75, 76, 77, 79, 80, 82, 87, 88, 89, 90, 92, 94, 97, 98, 99], "heterogen": 2, "heurist": [5, 28, 73, 82, 93], "hf": [6, 10, 13, 17, 29, 30, 46, 47, 48, 49, 50, 54, 55, 56, 57, 59, 73, 74, 75, 87, 91, 92, 94], "hf_config_or_dir": 84, "hf_lora_convert": 10, "hf_model": [73, 84], "hf_model_dir": [13, 14, 15, 19, 84], "hf_model_nam": 73, "hf_model_or_dir": 84, "hf_quant_config": 73, "hf_token": 73, "hfconfigordir": 84, "hgx": 23, "hi": 10, "hidden": [0, 3, 4, 5, 6, 10, 12, 26, 27, 70, 82, 83, 93], "hidden_act": [15, 83, 84], "hidden_dim": [0, 5, 82], "hidden_dim_per_head": [5, 82], "hidden_dtyp": 83, "hidden_s": [0, 7, 15, 17, 82, 83, 84, 87, 95, 97], "hidden_size_in": 10, "hidden_size_out": 10, "hidden_size_per_head": 82, "hidden_st": [14, 82, 83, 84, 87, 92, 95], "hidden_states_for_emb": 84, "hiddens": [0, 1, 6], "hide": [26, 28], "hierarch": 15, "hierarchi": [19, 64, 82], "high": [3, 12, 14, 16, 19, 21, 25, 26, 27, 28, 69, 73, 81, 82, 89, 93], "higher": [0, 1, 5, 6, 9, 10, 12, 17, 21, 22, 24, 28, 71, 74, 81, 89, 93, 95], "highest": [6, 7, 22, 23], "highli": [12, 16, 28, 72, 77], "highlight": [22, 25, 77, 79], "himself": 51, "hin": 27, "hint": [73, 82], "histori": 28, "hit": [0, 28, 70, 74, 79, 80, 93], "hk": 12, "ho": 10, "hoc": [19, 87], "hold": [0, 1, 3, 4, 7, 8, 9, 10, 12, 70, 76, 83, 89, 96], "home": [20, 59, 73], "homo_head_pattern": 83, "homogen": 2, "hope": 26, "hopper": [5, 9, 20, 21, 22, 25, 27, 28, 29, 64, 65, 71, 77, 91, 93], "horatio": 51, "horizont": [28, 29], "host": [1, 10, 28, 30, 32, 37, 52, 57, 64, 65, 70, 80, 82, 93], "host_cache_s": 70, "host_context_length": [82, 83, 84, 87, 92], "host_context_progress": [82, 83, 92], "host_cross_kv_cache_block_offset": [83, 87], "host_cross_kv_cache_pool_map": 83, "host_cross_kv_cache_pool_point": 83, "host_kv_cache_block_offset": [82, 83, 87, 92], "host_kv_cache_block_point": 92, "host_kv_cache_pool_map": [82, 83, 92], "host_kv_cache_pool_point": [82, 83, 92], "host_max_attention_window_s": [82, 83, 92], "host_past_key_value_length": [82, 83, 92], "host_request_typ": [82, 83, 84, 92], "host_runtime_perf_knob": [82, 83, 92], "host_sink_token_length": [82, 83, 92], "hostcaches": [0, 9], "hostmemori": 1, "hostnam": 30, "hour": 75, "hous": 76, "how": [0, 2, 3, 12, 14, 16, 17, 19, 29, 32, 40, 55, 64, 69, 70, 72, 75, 77, 78, 80, 82, 88, 89, 90, 92, 94, 96, 97], "howev": [2, 3, 5, 12, 19, 20, 21, 26, 27, 28, 30, 73, 76, 77, 79, 80, 81, 89, 93, 95, 96], "hpc": 22, "html": [1, 82, 92], "http": [0, 1, 4, 10, 19, 20, 26, 29, 30, 33, 34, 35, 60, 61, 62, 65, 66, 67, 69, 82, 88, 90, 92, 93, 94], "hub": [18, 58, 70, 73, 88, 93, 94], "hug": [3, 10, 13, 18, 19, 36, 70, 73, 84, 88, 93], "huggingfac": [0, 10, 14, 15, 17, 19, 20, 34, 58, 61, 69, 73, 74, 75, 88, 91, 92, 93, 95], "huggingface_exampl": 94, "huggingface_hub": 58, "huggingface_model_card": 94, "human": [26, 73], "hurt": [28, 80], "hw": [26, 28], "hybrid": [4, 93], "hyper": 15, "hypothesi": 12, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 35, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 65, 66, 67, 69, 70, 71, 73, 74, 75, 77, 78, 79, 80, 81, 82, 83, 84, 85, 87, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], "ia3": 5, "iactivationlay": 16, "ibrahimamin1": 93, "ibufferptr": 1, "iconstantlay": 82, "icudaengin": [87, 89], "id": [0, 1, 3, 9, 27, 36, 48, 70, 73, 74, 82, 83, 87, 88, 97, 98], "idea": [10, 28, 80], "ideal": [7, 77, 79, 93], "ident": [3, 9, 28, 29, 82], "identifi": [0, 6, 10, 12, 16, 73, 79, 82], "idl": 0, "idtyp": [0, 3], "idx": 87, "ieee": 90, "ieinsumlay": 82, "ielementwiselay": 82, "iexecutioncontext": [87, 89], "ifb": [12, 93], "ifilllay": 82, "igatherlay": 82, "ignor": [29, 70, 73, 82, 87], "ignore_eo": [70, 93], "igptdecod": 1, "ihostmemori": [1, 16, 87], "ii": [5, 82], "ij": 82, "ijk": 82, "ijl": 82, "ik": 82, "ikl": 82, "ilay": [7, 16], "illustr": [7, 12, 18, 26, 27], "ilogg": 1, "ilooplay": 82, "imag": [30, 34, 39, 55, 56, 57, 61, 64, 66, 67, 73, 83, 87, 93], "image64": 61, "image_grid_thw": 87, "image_patches_indic": 87, "image_path": 87, "image_s": 84, "image_token_index": 87, "image_url": [34, 61], "imatrixmultiplylay": 82, "imbal": 79, "immedi": [5, 12, 71, 75, 92], "immut": 1, "impact": [11, 12, 21, 25, 26, 27, 28, 30, 58, 76, 77, 79, 80, 81], "imped": 25, "impl": [0, 99], "implement": [2, 3, 5, 6, 8, 12, 15, 16, 18, 19, 21, 28, 52, 64, 71, 82, 83, 88, 90, 91, 92, 93, 95, 96, 98, 99], "implicit": [1, 5, 12, 82], "implicitli": 1, "import": [11, 12, 17, 19, 21, 25, 27, 28, 30, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 60, 61, 62, 64, 66, 67, 75, 77, 79, 80, 81, 88, 91, 93, 94, 95, 96, 98], "impos": 25, "improv": [5, 9, 11, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 42, 46, 47, 49, 50, 64, 71, 73, 74, 75, 77, 78, 79, 80, 93, 94, 97], "in_channel": 83, "in_featur": [15, 16, 83], "in_hidden_s": 82, "in_len": 7, "in_point": 82, "in_progress": 87, "includ": [0, 1, 2, 3, 5, 6, 9, 10, 12, 15, 16, 17, 18, 21, 22, 24, 27, 28, 29, 30, 36, 45, 51, 59, 65, 67, 69, 70, 71, 77, 80, 82, 88, 90, 92, 93, 96, 97, 98, 99], "include_stop_str_in_output": 70, "inclus": 82, "incompat": [29, 93, 94], "incorpor": [0, 26, 71, 93], "incorrect": [9, 12, 93], "increas": [0, 5, 9, 12, 16, 20, 22, 23, 26, 27, 28, 29, 72, 73, 75, 77, 80, 81, 82, 93, 99], "incred": 71, "increment": [65, 93], "incur": [16, 26], "inde": 89, "independ": [0, 1, 2, 3, 12, 82], "index": [0, 1, 3, 8, 12, 17, 26, 36, 53, 64, 66, 67, 70, 82, 87, 88, 93, 97], "index_select": 82, "indic": [0, 1, 3, 5, 6, 12, 15, 70, 81, 82, 83, 87, 89, 98], "indim": 1, "indimfirst": 1, "indirect": 1, "individu": [26, 93], "indivis": 93, "inductor": 70, "industri": 73, "ineffici": [5, 26], "inetworkdefinit": [7, 16, 82], "inevit": 16, "inf": 52, "infeas": 3, "infer": [0, 2, 6, 10, 12, 16, 18, 19, 20, 21, 22, 23, 26, 28, 29, 34, 61, 64, 69, 72, 74, 75, 76, 77, 78, 80, 81, 82, 87, 90, 92, 93, 96], "infer_shap": 87, "inferencerequest": 93, "infin": 32, "infinit": [16, 73, 74], "inflat": 26, "inflight": [0, 5, 10, 12, 30, 68, 70, 73, 78, 79, 82, 93, 97, 99], "inflight_request_id": 99, "inflightbatch": 0, "inflightbatchingstat": [0, 30], "influenc": [26, 80], "info": [0, 29, 30, 73, 89, 92], "inform": [0, 1, 2, 3, 5, 6, 8, 12, 15, 16, 21, 24, 26, 30, 64, 71, 73, 75, 91, 92, 93], "infti": 6, "inherit": [17, 19, 82, 95, 96, 98, 99], "init": [1, 20, 28, 65, 93], "init_audio_encod": 87, "init_image_encod": 87, "init_llm": 87, "init_processor": 87, "init_token": 87, "initi": [1, 2, 12, 17, 26, 52, 70, 73, 77, 79, 80, 89, 92, 93, 95, 97, 99], "initializer_list": [0, 1], "initmemorypool": 89, "inittozero": 1, "inlin": [0, 1], "inner": 82, "inner_layernorm": [83, 84], "innov": 28, "inp": 82, "inpaint": [34, 61], "inprogress": 1, "input": [0, 1, 3, 6, 7, 9, 10, 11, 12, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 36, 38, 39, 56, 61, 64, 69, 70, 72, 73, 74, 75, 76, 78, 80, 81, 82, 83, 84, 87, 89, 91, 92, 93, 95, 96, 97, 99], "input_1": 82, "input_1_": 82, "input_audio": 87, "input_featur": 84, "input_fil": 93, "input_id": [9, 14, 26, 73, 82, 84, 87, 92, 95], "input_imag": 87, "input_layernorm": [14, 15, 17, 95], "input_length": [82, 83, 84, 87], "input_list": 82, "input_n": 82, "input_n_": 82, "input_text": [14, 16, 87, 88], "input_timing_cach": [29, 70], "input_token_extra_id": 87, "inputbuff": 1, "inputdesc": 16, "inputdtyp": 1, "inputgentokenshost": 1, "inputlen": 1, "inputpack": [1, 6], "inputs_emb": 95, "inputtokenextraid": 0, "inputtokenid": 0, "insert": [7, 16, 73, 82], "insertinputtensor": 1, "insid": [1, 12, 17, 19, 20, 27, 28, 65, 67, 82, 89, 97], "insight": 26, "insiz": 1, "inspect": [29, 72, 89], "inspir": 27, "instabl": 2, "instal": [19, 30, 31, 55, 56, 57, 65, 69, 75, 88, 93, 95], "instanc": [0, 2, 3, 6, 7, 8, 12, 16, 26, 36, 52, 69, 70, 87, 89, 93, 97], "instance_idx": 92, "instanti": [75, 81, 98], "instead": [7, 9, 12, 16, 19, 20, 21, 36, 65, 70, 80, 81, 82, 89, 93], "instruct": [12, 20, 28, 30, 34, 39, 46, 61, 65, 73, 74, 75, 76, 80, 81, 88, 91, 93, 94, 95], "instrument": 28, "int": [0, 1, 6, 14, 15, 16, 19, 48, 52, 70, 79, 82, 83, 84, 87, 95, 97, 98, 99], "int32": [1, 5, 29, 82, 85, 92], "int32_t": [0, 1, 82], "int4": [17, 19, 25, 29, 36, 59, 64, 91, 93], "int4_weight": 90, "int64": [1, 6, 82, 92], "int64_t": [0, 1], "int8": [1, 15, 17, 19, 25, 29, 64, 70, 77, 82, 89, 91, 93], "int8_kv_cach": [5, 90, 93], "int8_t": [0, 1], "int8_weight": 90, "int8awq": 77, "int_clip": 82, "integ": [5, 70, 73, 82, 90, 93], "integr": [12, 93, 96, 97, 98, 99], "intellig": 71, "intend": 89, "intens": 28, "intent": 75, "intention": 19, "intenum": 82, "inter": [2, 75, 76, 77, 79, 80, 92, 93], "inter_layernorm": 84, "inter_s": 17, "interact": [3, 12, 71, 88, 92], "interchang": [8, 69], "interconect": 76, "interconnect": [6, 75, 76, 77, 79, 80], "interest": 73, "interfac": [16, 19, 75, 87, 93, 95, 96], "interfer": 92, "interleav": [5, 16, 28], "intermedi": [5, 16, 28, 70, 92], "intermediate_s": [15, 84], "intern": [1, 3, 5, 8, 19, 20, 26, 28, 75, 78, 89, 92, 98], "internal_error": [29, 30], "internlm": [69, 90, 91, 93], "internlm2": [90, 91, 93], "internvl2": 93, "interpol": 82, "interpolation_scal": 83, "interpret": [3, 65, 79], "intersect": 2, "intertwin": 80, "intflag": [84, 86], "intpsplitdim": 1, "intra": 76, "introduc": [19, 22, 26, 27, 32, 90, 93], "introduct": [78, 88, 93], "intuit": [28, 71, 78], "inv": 82, "inv_freq": 82, "invalid": [92, 93], "invalidateremoteag": 0, "inventori": 73, "invers": 5, "invest": 73, "investig": [20, 93], "invit": 59, "invoc": 93, "invok": [0, 3, 7, 69, 92, 99], "invokequant": 16, "involv": [0, 1, 2, 12, 16, 25, 27, 28, 83, 96, 97, 98], "io": [5, 31, 32, 89, 93], "ip": [0, 93], "ipc": 65, "ipc_uc_handl": 1, "ipc_uc_ptr": 1, "ipc_uc_va": 1, "ipcmemori": 1, "ipcnvl": 1, "ipcnvlsalloc": 1, "ipcnvlsfre": 1, "ipcnvlshandl": 1, "ipcnvlssupport": 1, "ipluginv3lay": 82, "ireducelay": 82, "irrespect": [0, 6, 52, 70], "is_alibi": 82, "is_caus": 83, "is_const_v": 1, "is_cuda_graph": 97, "is_def": 82, "is_dora": 10, "is_dynam": 82, "is_enc_dec": 87, "is_expert": 83, "is_gated_activ": 82, "is_gemma_2": 84, "is_gemma_3": 84, "is_keep_al": 70, "is_loc": 83, "is_medusa_mod": 87, "is_mla_en": 82, "is_mla_enabled_flag": 82, "is_module_excluded_from_quant": 70, "is_mrop": 82, "is_network_input": 82, "is_orchestrator_mod": 87, "is_public_pool": 70, "is_qkv": 83, "is_redrafter_mod": 87, "is_rop": 82, "is_trt_wrapp": 82, "is_use_oldest": 70, "is_valid": 83, "is_valid_cross_attn": 83, "isagentst": 0, "isauto": 0, "isbeamsearch": 0, "iscomplet": 0, "iscontextparallel": 1, "iscontinuouskvcach": 1, "iscrossattent": 1, "isdon": 1, "isdora": 1, "isdrafttokensextern": 1, "iseagl": [0, 1], "iselectlay": 82, "isexplicitdrafttoken": [0, 1], "isexternaldrafttoken": 0, "isfin": [0, 3], "isfirstcontextparallelrank": 1, "isfirstpipelineparallelrank": 1, "isfirsttensorparallelrank": 1, "isgreedysampl": 0, "ishufflelay": 82, "iskvcacheen": 1, "isl": [0, 21, 22, 23, 24, 26, 27, 28, 73, 74, 80], "islastpipelineparallelrank": 1, "isleg": 0, "islicelay": 82, "isload": 1, "islookahead": 0, "islookaheaddecod": 1, "ismedusa": [0, 1], "ismpist": 0, "ismultimod": 1, "isn": 92, "isnon": 1, "isoftmaxlay": 82, "isorchestr": 0, "ispagedkvcach": 1, "isparticip": [0, 93], "ispipelineparallel": 1, "ispoint": 1, "isrnnbas": 1, "issequencefin": [0, 3], "issocketst": 0, "issu": [5, 16, 19, 58, 64, 65, 67, 69, 73, 74, 75, 82, 92], "istensorparallel": 1, "isthreadsaf": 0, "istopk": 0, "istopkandtopp": 0, "istopkortopp": 0, "istopp": 0, "istransformerbas": 1, "istream": [0, 1], "isunsign": 1, "isusebantoken": 0, "isusebanword": 0, "isuseexpliciteosstop": 0, "isusefrequencypenalti": 0, "isusemaxlengthstop": 0, "isuseminlength": 0, "isuseminp": 0, "isusenorepeatngrams": 0, "isuseoccurrencepenalti": 0, "isusepenalti": 0, "isusepresencepenalti": 0, "isuserepetitionpenalti": 0, "isusestopcriteria": 0, "isusestopword": 0, "isusetemperatur": 0, "isusevariablebeamwidthsearch": 0, "iswhisp": 1, "ite": 87, "item": [0, 3, 28, 87], "itensor": [0, 82], "itensorbind": 1, "itensorptr": 1, "iter": [0, 1, 3, 5, 12, 17, 26, 27, 30, 70, 71, 73, 75, 79, 80, 81, 87, 93], "iter_stats_max_iter": 70, "iterationresult": 70, "iterationstat": 0, "iterationtyp": 0, "iterlatencym": [0, 30], "iterlatencymillisec": 93, "iterstat": 0, "iterstatsmaxiter": 0, "iterstatsvec": 0, "ith": 82, "itl": [77, 80, 93], "its": [0, 1, 3, 5, 6, 7, 8, 13, 15, 16, 17, 19, 21, 23, 26, 27, 45, 69, 71, 73, 76, 78, 79, 80, 82, 89, 96, 97, 99], "itself": [3, 28, 87], "itsuji": 73, "iunarylay": 82, "j": [5, 6, 22, 25, 27, 55, 56, 57, 69, 73, 82, 90, 91, 93], "jacobi": 12, "jai": 93, "jamesthez": 93, "jane": 59, "janpetrov": 93, "japanes": [10, 73], "jax": [15, 19], "ji": 82, "jit": [20, 67, 93], "jj": 82, "jk": 82, "jl749": 93, "job": [16, 56, 57], "joint": 28, "joint_attention_kwarg": 84, "joint_attn_forward": 83, "journei": [26, 71], "jpg": 73, "json": [0, 1, 3, 15, 30, 33, 34, 35, 38, 39, 45, 52, 70, 72, 73, 88, 93], "json_object": 70, "jsonconfigstr": 0, "jsonl": 73, "jsonseri": 0, "just": [0, 1, 12, 27, 28, 55, 56, 57, 58, 67, 73, 75, 81, 87, 89], "justic": [42, 46, 47, 49, 50, 58], "k": [1, 5, 6, 10, 12, 18, 26, 27, 28, 70, 82, 90, 92, 93, 95, 97], "k_b_proj_tran": 82, "k_dim": 82, "k_proj": [17, 95], "kattent": 1, "kattn_dens": 1, "kattn_k": 1, "kattn_q": 1, "kattn_qkv": 1, "kattn_v": 1, "kauto": 0, "kbatchedpostprocessornam": [0, 3], "kbeamsearch": 0, "kbf16": 0, "kblk": 0, "kbool": [0, 1], "kbyte_typ": 1, "kc_cache_retention_config": 93, "kcancel": 0, "kchatglm": 1, "kcontext": 1, "kcontext_in_progress": 0, "kcontinu": 1, "kcpu": [0, 1], "kcpu_pin": 0, "kcpu_pinnedpool": 0, "kcross_attn_dens": 1, "kcross_attn_k": 1, "kcross_attn_q": 1, "kcross_attn_qkv": 1, "kcross_attn_v": 1, "kdatatyp": 1, "kdecoder_onli": [0, 13], "kdefault": 0, "kdefault_num_tokens_per_block": 1, "kdefaultbatchsizet": 0, "kdefaultdynamicbatchmovingaveragewindow": 0, "kdefaultgpuspernod": 1, "kdefaultiterstatsmaxiter": 0, "kdefaultlookaheaddecodingngram": 0, "kdefaultlookaheaddecodingverificationset": 0, "kdefaultlookaheaddecodingwindow": 0, "kdefaultmaxadapters": 0, "kdefaultmaxpagesperblockdevic": 0, "kdefaultmaxpagesperblockhost": 0, "kdefaultmaxseqidlemicrosecond": 0, "kdefaultoptimaladapters": 0, "kdefaultprior": 0, "kdefaultrequeststatsmaxiter": 0, "kdefaultretentionprior": 0, "kdisabl": 1, "kdrafttokensextern": 1, "kdram": 0, "kdynamicpostprocessornameprefix": 0, "keagl": [0, 1], "kebnf_grammar": [0, 3], "keep": [0, 5, 6, 19, 26, 28, 70, 74, 81, 82, 93], "keepdim": 82, "kei": [0, 2, 3, 9, 16, 21, 25, 27, 28, 64, 73, 74, 79, 84, 87, 92, 96, 97, 98], "kenabl": 1, "kencdec": 1, "kencoder_decod": 0, "kencoder_in_progress": 0, "kencoder_onli": 0, "kend_id": 0, "kept": [5, 19, 70, 82], "kequal_progress": 0, "kera": 19, "kernel": [1, 5, 9, 16, 21, 27, 28, 29, 52, 67, 71, 72, 77, 80, 82, 87, 88, 89, 92, 93], "kernel_s": [82, 83], "kexplicitdrafttoken": [0, 1], "kexternaldrafttoken": 0, "key_length": [82, 83], "keyvaluecacheparam": [83, 84], "keyword": [17, 70, 82, 89], "kfile": 0, "kfirst_come_first_serv": 0, "kfloat": [1, 16], "kfp16": 0, "kfp32": [0, 70], "kfp8": 0, "kgener": 1, "kgeneration_complet": 0, "kgeneration_in_progress": 0, "kglm": 1, "kgpt": 1, "kgpu": [0, 1], "kguaranteed_no_evict": 0, "khalf": 1, "kind": [4, 5, 7, 26, 99], "kinflight": 0, "king": 51, "kint32": [0, 1], "kint64": [0, 1], "kint8": [0, 1], "kinvalid": 1, "kispoint": 1, "kisunsign": 1, "kj": 82, "kjson": [0, 3], "kjson_schema": [0, 3], "kleader": [0, 2], "klength": 0, "klinear": 1, "klookahead": 0, "klookaheaddecod": 1, "kmamba": 1, "kmax_util": 0, "kmaxretentionprior": 0, "kmedusa": [0, 1], "kminretentionprior": 0, "kmla": 0, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kmlp_gate_up": 1, "kmlp_h_to_4h": 1, "kmlp_router": 1, "kmoe_4h_to_h": 1, "kmoe_gat": 1, "kmoe_h_to_4h": 1, "kmoe_rout": 1, "kmpi": 0, "knegativeinfin": 1, "knob": [0, 70, 81, 82], "knone": 1, "knoop": 1, "knot_finish": 0, "know": [6, 72, 81, 82], "knowledg": 64, "known": [5, 12, 16, 64, 67, 82, 91], "knumflag": 0, "kobj": 0, "kopt_profiles_split_point": 1, "korchestr": [0, 2], "kosmo": [91, 93], "kpage": 1, "kpin": 1, "kpinnedpool": 1, "kqueu": 0, "kread": 0, "krecurr": 1, "krecurrentgemma": 1, "kregex": [0, 3], "kstatic": 0, "kstatic_batch": 0, "kstop_word": 0, "kstructural_tag": 0, "ktimed_out": 0, "ktopk": 0, "ktopktopp": 0, "ktopp": 0, "ktrtpointertyp": 1, "kuint8": [0, 1], "kunderlyingtyp": 1, "kunish": 10, "kunknown": 0, "kunsign": 1, "kusebantoken": 0, "kusebanword": 0, "kuseexpliciteosstop": 0, "kusefrequencypenalti": 0, "kusemaxlengthstop": 0, "kuseminlength": 0, "kuseminp": 0, "kusenorepeatngrams": 0, "kuseoccurrencepenalti": 0, "kusepenalti": 0, "kusepresencepenalti": 0, "kuserepetitionpenalti": 0, "kusestandardstopcriteria": 0, "kusestopword": 0, "kusetemperatur": 0, "kusevariablebeamwidthsearch": 0, "kuvm": [0, 1], "kv": [0, 1, 2, 3, 10, 16, 19, 21, 25, 27, 28, 29, 30, 36, 40, 41, 43, 44, 54, 64, 68, 70, 71, 73, 74, 75, 79, 82, 87, 88, 93, 94, 95, 96, 97, 99], "kv_b_proj": 82, "kv_cach": 0, "kv_cache_block_offset": [82, 83, 87, 92], "kv_cache_block_point": 92, "kv_cache_config": [30, 36, 43, 44, 49, 51, 53, 54, 70, 81, 98], "kv_cache_dtyp": [51, 70, 73, 77, 86, 98], "kv_cache_enable_block_reus": [87, 93], "kv_cache_free_gpu_mem_fract": [20, 74, 81], "kv_cache_free_gpu_memory_fract": [30, 37, 87, 93], "kv_cache_host_memory_byt": 9, "kv_cache_manag": [0, 93, 96, 97, 98, 99], "kv_cache_param": [83, 84, 97], "kv_cache_quant_algo": [15, 59, 70, 73, 77], "kv_cache_quant_mod": [5, 82], "kv_cache_retention_config": 70, "kv_cache_scaling_factor": [5, 15], "kv_cache_typ": [16, 29, 70, 87, 93], "kv_dtype": 84, "kv_event": 51, "kv_head": 83, "kv_host_cache_byt": 9, "kv_lora_rank": [82, 83], "kv_orig_quant_scal": 82, "kv_quant_orig_scal": 82, "kvalue_status_load": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvcach": [0, 26, 43, 44, 54, 93], "kvcacheblock": 8, "kvcacheblockpool": 8, "kvcacheconfig": [0, 5, 9, 36, 43, 44, 49, 51, 53, 54, 70, 81, 89], "kvcachecreateddata": [0, 70], "kvcacheev": 0, "kvcacheeventdata": 0, "kvcacheeventdiff": 0, "kvcacheeventmanag": [0, 64], "kvcachehitr": 0, "kvcachehitrateperrequest": 0, "kvcacheindex": 1, "kvcachemanag": [0, 5, 9, 87, 97, 98], "kvcachemetr": 0, "kvcacheparam": 97, "kvcacheremoveddata": [0, 70], "kvcacheretentionconfig": [0, 70], "kvcaches": 0, "kvcachestat": [0, 30], "kvcachestoredblockdata": 0, "kvcachestoreddata": [0, 70], "kvcachetransferend": 0, "kvcachetransferm": 0, "kvcachetransfermod": [0, 70], "kvcachetransferstart": 0, "kvcachetyp": [1, 70, 87], "kvcachetypefromstr": 1, "kvcacheupdateddata": [0, 70], "kvfactor": 0, "kvheadnum": 82, "kvram": 0, "kwarg": [17, 19, 70, 82, 83, 84, 87, 93, 95], "kwrite": 0, "kxgrammar": 0, "l": [12, 55, 56, 57, 73, 91], "l2": 29, "l20": 29, "l304": 26, "l345": 26, "l4": 29, "l40": 29, "l440": 26, "l506": 26, "l546": 26, "l823": 26, "lab": 73, "label": [7, 82, 83, 84], "labelembed": 83, "lack": 0, "lai": 27, "lambda": [0, 3], "lamportinitializeal": 1, "languag": [0, 6, 12, 16, 18, 21, 26, 71, 72, 82, 90, 91, 93, 96], "language_adapt": [87, 93], "language_adapter_config": 87, "language_adapter_rout": [84, 87], "language_adapter_uid": 87, "language_model": 17, "languageadapterconfig": 87, "languageadapteruid": 0, "larg": [5, 9, 11, 12, 16, 18, 19, 20, 21, 25, 26, 28, 29, 34, 52, 61, 71, 72, 73, 76, 77, 79, 80, 82, 89, 91, 92, 93, 96], "larger": [0, 2, 5, 6, 9, 12, 13, 20, 22, 23, 25, 28, 54, 70, 73, 74, 82, 87, 89, 93], "largest": [6, 21, 22, 23, 82], "last": [0, 1, 3, 5, 10, 11, 12, 14, 26, 27, 70, 79, 81, 82, 84], "last_lay": 87, "last_process_for_ub": 82, "last_token_id": [82, 84, 92], "last_token_ids_for_logit": 84, "last_tokens_id": 82, "lastdraftindic": 1, "lastdraftlen": 1, "lastdraftpath": 1, "lastdrafttoken": 1, "lastgenerationlength": 1, "lastit": 0, "lastpositionidsbas": 1, "lasttokentim": 0, "late": 58, "latenc": [0, 5, 9, 12, 22, 23, 25, 27, 28, 29, 64, 70, 74, 79, 80, 81, 82, 93], "latent": [28, 83, 84], "later": [0, 1, 6, 10, 12, 16, 19, 23, 46, 49, 69, 77, 80, 87, 89, 92, 94], "latest": [0, 28, 31, 65, 88, 93], "latter": [3, 25, 93], "launch": [2, 9, 16, 28, 30, 52, 55, 56, 57, 64, 67, 69, 75, 92, 93, 94], "launch_llama_3": 16, "layer": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 15, 16, 17, 27, 29, 70, 76, 82, 87, 88, 89, 90, 92, 93, 95, 97, 98], "layer1": 10, "layer_idx": [10, 14, 82, 87, 95, 97], "layer_names_onli": [29, 70], "layer_norm": [82, 83], "layer_quant_mod": 70, "layer_typ": 87, "layerid": [1, 10], "layeridx": 1, "layernorm": [14, 29, 80, 82, 83, 93], "layernorm_shar": 83, "layernorm_typ": 83, "layernormpositiontyp": 82, "layernormtyp": [82, 83], "layertyp": [1, 7], "layerwis": 70, "layout": [79, 93], "lead": [7, 9, 12, 16, 29, 58, 65, 73, 74, 75, 77, 79, 80], "leader": [0, 87], "learn": [22, 23, 25, 42, 46, 47, 49, 50, 52, 77, 82, 88], "learned_absolut": [15, 82, 83, 84], "least": [0, 3, 5, 19, 20, 30, 58, 79, 87], "leav": [59, 79, 80, 81], "left": [70, 74, 79, 81, 82], "legaci": [17, 81, 85, 93], "len": [0, 1, 73, 82, 87, 99], "length": [0, 1, 5, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 53, 70, 73, 74, 75, 78, 80, 81, 82, 87, 89, 92, 93, 97, 98], "length_penalti": [6, 70, 87], "lengthlengthpenalti": 6, "lengthpenalti": [0, 1, 6], "less": [0, 3, 5, 6, 16, 22, 27, 70, 74, 82], "let": [7, 14, 15, 17, 26, 31, 36, 71, 73, 79, 82], "letter": 82, "level": [0, 1, 3, 5, 8, 10, 14, 15, 17, 19, 27, 28, 29, 30, 49, 69, 72, 73, 89, 93, 95], "leverag": [12, 21, 26, 27, 77, 88], "lf": [10, 20, 65, 69], "lfz941": 93, "lh": 1, "lib": [19, 67, 73], "libnam": 0, "libnvinfer_plugin_tensorrt_llm": 65, "libopenmpi": [66, 67], "librari": [16, 18, 65, 69, 71, 92, 93, 97], "libtensorrt_llm": 65, "licens": [69, 88], "life": 58, "lifecycl": 8, "lightweight": 5, "like": [0, 3, 5, 6, 7, 9, 12, 15, 16, 18, 19, 25, 26, 27, 28, 29, 36, 42, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 70, 71, 73, 75, 76, 77, 79, 80, 81, 82, 88, 89, 90, 92, 93, 94, 95, 96, 98], "likelihood": [4, 9, 12], "limit": [0, 2, 3, 5, 6, 7, 16, 19, 20, 25, 26, 27, 28, 36, 67, 69, 70, 71, 75, 79, 81, 82, 85, 87, 89, 91, 97], "lin": 21, "line": [9, 20, 25, 73, 75, 77, 80, 89, 93, 98, 99], "linear": [1, 10, 12, 14, 15, 16, 28, 82, 89, 90, 93, 95, 97], "linearactiv": 83, "linearapproximategelu": 83, "linearbas": 83, "lineargeglu": 83, "lineargelu": 83, "linearli": 89, "linearswiglu": 83, "link": [9, 20, 26, 31, 32, 93], "linspac": 82, "linux": [64, 91, 93], "linux_x86_64": 65, "list": [0, 1, 3, 5, 6, 7, 15, 16, 17, 18, 27, 36, 52, 65, 68, 70, 71, 73, 74, 75, 82, 83, 84, 87, 91, 92, 93, 97, 98, 99], "list_siz": 83, "liter": 70, "littl": [27, 80], "live": 89, "livecodebench": 26, "lkm2835": 93, "ll": [25, 30], "llama": [6, 10, 12, 13, 17, 19, 22, 23, 25, 29, 46, 54, 69, 75, 76, 78, 79, 81, 88, 90, 91, 93, 94, 95], "llama2": [5, 10, 21, 22, 93], "llama3": 82, "llama4": 70, "llama4forconditionalgener": 91, "llama_13b": 23, "llama_70b": 23, "llama_7b": [10, 13], "llama_7b_with_lora_qkv": 10, "llama_model_path": 36, "llamaconfig": [84, 95], "llamaforcausallm": [17, 19, 84, 91], "llamamodel": 84, "llava": [17, 90, 91, 93], "llava_dict": 17, "llavallamamodel": 91, "llavanextforconditionalgener": 91, "llavanextvisionconfig": 84, "llavanextvisionwrapp": 84, "llm": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 14, 16, 21, 24, 26, 28, 29, 30, 33, 34, 35, 37, 38, 39, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 58, 59, 60, 61, 62, 66, 67, 68, 70, 72, 74, 76, 77, 78, 80, 81, 82, 84, 86, 87, 90, 92, 94, 95, 96, 97, 98, 99], "llm_arg": [70, 74], "llm_engine_dir": 87, "llm_inference_distribut": 69, "llm_kwarg": [43, 44, 54], "llm_mgmn_": 93, "llm_option": 74, "llm_ptq": 94, "llmapi": [3, 30, 36, 43, 44, 45, 49, 51, 53, 54, 55, 56, 57, 59, 70, 74, 77, 93], "llmarg": [70, 74, 93], "llmrequest": [1, 98, 99], "llmrequestptr": 1, "llmrequestst": 99, "lm": 12, "lm_head": [14, 17, 54, 73, 93], "lmm": [6, 73], "lmsy": [43, 44, 54], "ln_emb": 17, "ln_f": [14, 17], "load": [0, 1, 10, 14, 15, 16, 19, 24, 26, 28, 29, 46, 49, 54, 67, 69, 70, 73, 74, 75, 80, 81, 84, 86, 87, 88, 89, 93], "load_format": 70, "load_model_on_cpu": 84, "load_tensor": 17, "load_test_audio": 87, "load_test_data": 87, "load_weight": 95, "loaded_weight": 83, "loader": 93, "loadformat": 70, "loadinprogress": 1, "loadremoteag": 0, "loadweight": 1, "local": [15, 16, 20, 26, 29, 46, 47, 48, 49, 50, 55, 56, 57, 59, 65, 67, 70, 73, 74, 77, 93, 98], "local_in_featur": 83, "local_layer_idx": 83, "local_model": [55, 56, 57], "local_out_featur": 83, "local_us": [20, 65, 88], "localhost": [30, 33, 34, 35, 37, 38, 39, 60, 61, 62, 88], "localinadapters": 1, "localindim": 1, "localinouts": 1, "localins": 1, "localoutadapters": 1, "localoutdim": 1, "localouts": 1, "localreduct": 26, "localscaless": 1, "localtotals": 1, "locat": [6, 7, 16, 28, 59, 65, 73, 74, 82, 88, 92, 97], "locate_accepted_draft_token": 87, "lock": [67, 73], "lockstep": 0, "log": [0, 1, 5, 8, 29, 30, 31, 55, 56, 57, 59, 70, 73, 82, 88, 89, 93], "log_level": [29, 30], "log_softmax": 82, "logic": [3, 8, 17, 19, 52, 83, 93, 95, 96, 99], "login": [31, 88], "logit": [0, 1, 6, 12, 26, 27, 40, 41, 70, 73, 82, 84, 87, 92, 93], "logits_dtyp": [15, 29, 84], "logits_processor": [52, 70, 87], "logits_processor_map": 87, "logits_processor_nam": 87, "logitspostprocessor": 0, "logitspostprocessorbatch": [0, 3], "logitspostprocessorconfig": [0, 3, 93], "logitspostprocessormap": 0, "logitspostprocessornam": 0, "logitsprocessor": [52, 70, 87, 93], "logitsprocessorlist": 87, "logitsvec": 1, "logn": [82, 93], "logn_scal": 82, "logprob": [0, 1, 36, 53, 70, 88], "logprobs_diff": 70, "logprobscba": 1, "logprobstil": 1, "london": 92, "long": [5, 25, 29, 72, 73, 75, 76, 77, 79, 80, 89, 93], "long_mscal": [82, 83], "long_rop": 82, "long_rope_embed_posit": 83, "long_rope_embed_positions_for_gpt_attent": 83, "long_rope_rotary_cos_sin": 82, "long_rope_rotary_inv_freq": [82, 83], "longer": [0, 6, 9, 26, 28, 70, 74, 79, 82, 99], "longest": [2, 27, 79, 82], "longrop": 82, "longtensor": [87, 95], "look": [0, 3, 19, 24, 65, 71, 73, 93], "lookahead": [0, 1, 40, 41, 64, 70, 93], "lookahead_config": [53, 70, 87], "lookahead_decod": [29, 84], "lookaheadconfig": 0, "lookaheaddecod": 1, "lookaheaddecodingbuff": 1, "lookaheaddecodingconfig": [0, 1, 53, 70], "lookaheadinput": 1, "lookaheadoutput": 1, "lookaheadruntimebuff": 1, "lookaheadruntimeconfig": 1, "lookup": [64, 82, 83, 93], "lookup_plugin": 82, "loop": [0, 3, 6, 16, 17, 70, 81], "lopuhin": 93, "lora": [0, 1, 3, 40, 41, 64, 68, 70, 82, 83, 84, 87, 93], "lora_ckpt_sourc": [29, 87], "lora_config": [58, 70, 84], "lora_dir": [10, 29, 58, 87], "lora_dir1": 58, "lora_dir2": 58, "lora_dir3": 58, "lora_hidden_st": 83, "lora_layer_param": 83, "lora_manag": [58, 70, 87, 93], "lora_param": 84, "lora_plugin": [10, 29, 82, 87], "lora_rank": [10, 82], "lora_request": [58, 70], "lora_runtime_param": 83, "lora_target_modul": [10, 29, 84, 87], "lora_task_uid": 87, "lora_uid": 87, "lora_weights_point": 82, "loracachefullexcept": 1, "loracachepagemanag": 1, "loraconfig": [0, 10, 58, 70, 84, 93], "loraexpectedexcept": 1, "loraid": 0, "loramanag": 87, "loramodulenam": 1, "loraparam": 84, "loraprefetchdir": 0, "lorarequest": [58, 70], "loraruntimeparam": 83, "lorataskidtyp": [0, 1], "loraweight": 10, "loss": [25, 77], "lot": [5, 9, 16, 18, 27], "loudspeak": 23, "lovelac": [71, 91, 93], "low": [5, 14, 19, 20, 25, 26, 27, 28, 29, 64, 82, 93], "low_latency_gemm": 82, "low_latency_gemm_plugin": [29, 73, 77, 83], "low_latency_gemm_swiglu": 82, "low_latency_gemm_swiglu_plugin": [29, 77, 85], "low_rank": 82, "lower": [0, 1, 2, 6, 7, 9, 10, 24, 25, 28, 49, 70, 74, 77, 82, 89], "lowprecis": [11, 82], "lru": [1, 9, 82], "lt": 82, "luotuo": 10, "m": [0, 20, 22, 26, 30, 38, 39, 45, 58, 73, 74, 75, 77, 79, 80, 82, 89, 90], "macceptancethreshold": 0, "machin": [9, 20, 25, 52, 93], "madditionalmodeloutput": 0, "maddr": 0, "made": [52, 71, 93, 99], "magentnam": 0, "mahmoudashraf97": 93, "mai": [0, 1, 2, 3, 5, 9, 10, 11, 12, 15, 16, 17, 19, 20, 26, 27, 29, 31, 55, 56, 57, 65, 67, 69, 72, 73, 74, 75, 80, 81, 82, 83, 85, 89, 92, 93, 95, 96, 97, 98], "main": [3, 6, 8, 21, 24, 26, 27, 34, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 58, 59, 61, 66, 67, 69, 70, 72, 75, 77, 80, 81, 82, 88, 89, 92, 94, 95], "mainli": 27, "maintain": [2, 10, 21, 22, 25, 73, 77, 90], "major": [19, 26, 59, 71, 74, 89], "make": [1, 2, 5, 7, 10, 16, 19, 20, 25, 26, 27, 31, 32, 53, 58, 64, 65, 71, 73, 75, 81, 82, 88, 92, 93], "make_causal_mask": 83, "makeshap": 1, "maketransferag": 0, "mallotedtim": 0, "mallreducecommptr": 1, "mamba": [29, 69, 82, 90, 91, 93], "mamba1": 82, "mamba2": [82, 93], "mamba_conv1d": 82, "mamba_conv1d_plugin": [29, 87], "mamba_vers": 82, "mambaconfig": 84, "mambaforcausallm": 84, "manag": [0, 1, 2, 5, 12, 16, 28, 29, 36, 64, 67, 69, 75, 81, 85, 87, 88, 89, 93, 94, 96, 97], "managedweight": 0, "managedweightsmap": 1, "manageweightstyp": 1, "manageweighttyp": 1, "mandatori": [1, 3, 15], "mani": [0, 5, 8, 9, 12, 16, 19, 27, 28, 29, 32, 59, 70, 74, 77, 79, 81, 82, 91, 92], "manipul": 7, "manner": 7, "mantissa": 22, "manual": [28, 36, 70, 87, 92], "manufactur": 73, "map": [0, 1, 2, 3, 5, 7, 11, 14, 15, 16, 17, 19, 26, 74, 82, 83, 84, 87, 88, 98], "marcellu": 51, "mard1no": 93, "margin": [73, 79], "mark": [1, 7, 79, 82, 92], "mark_as_remov": 7, "mark_output": [3, 82], "markalldon": 1, "markdon": 1, "marker": 70, "marks101": 93, "marktaskdon": 1, "mask": [0, 1, 5, 12, 26, 27, 52, 82, 83, 84, 87, 97], "mask_typ": 82, "masked_scatt": 82, "masked_scatter_": 82, "masked_select": [82, 93], "massiv": 20, "master": [76, 77, 78], "mat2": 82, "match": [0, 4, 7, 12, 27, 64, 70, 73, 82, 83, 87, 88, 92, 93], "match_and_rewrit": 7, "materi": 3, "math": [26, 28, 91], "matichon": 93, "matmul": [5, 16, 29, 77, 82, 90], "matric": 4, "matrix": [5, 16, 24, 28, 64, 71, 73, 76, 82, 88, 97], "mattentionconfig": 0, "mattentiontyp": 0, "matter": 9, "matur": 30, "max": [0, 1, 10, 21, 22, 23, 28, 64, 70, 75, 77, 78, 80, 82, 87, 89, 92, 97], "max_all_reduce_block": 1, "max_attention_window": [70, 81, 93], "max_attention_window_s": [5, 81, 82, 87], "max_attn_valu": 83, "max_batch_s": [5, 10, 13, 15, 16, 19, 20, 27, 29, 30, 36, 37, 43, 44, 49, 53, 54, 70, 73, 77, 79, 80, 82, 84, 87, 89, 92, 93, 98], "max_beam_width": [3, 5, 29, 30, 36, 49, 70, 82, 84, 87, 89], "max_block": [82, 99], "max_blocks_per_seq": 87, "max_blocks_per_sequ": 82, "max_boost_slid": 73, "max_cache_storage_gb": 70, "max_context_length": [82, 83, 87, 89], "max_cpu_lora": 70, "max_decoder_input_len": 84, "max_decoder_seq_len": 29, "max_dist": [5, 82, 83], "max_draft_len": [29, 43, 44, 54, 70, 84, 86], "max_draft_token": [84, 87], "max_encoder_input_len": [29, 70, 84], "max_gen_token": 84, "max_input_len": [10, 13, 15, 16, 29, 70, 73, 84, 87, 89], "max_input_length": [82, 83, 84, 87], "max_kv_seqlen": 82, "max_lora": 70, "max_lora_rank": [10, 29, 58, 70], "max_low_rank": 82, "max_matching_ngram_s": 70, "max_medusa_token": 87, "max_multimodal_len": 29, "max_new_token": [87, 89], "max_ngram_s": [53, 70], "max_non_leaves_per_lay": [43, 44, 70], "max_num_request": [97, 98, 99], "max_num_token": [20, 29, 30, 36, 37, 49, 70, 73, 77, 79, 80, 84, 89, 93, 97], "max_output_len": [16, 87, 88, 92, 93], "max_period": 83, "max_position_embed": [15, 82, 83, 84], "max_position_embedding_len": 82, "max_power_limit": 73, "max_prompt_adapter_token": 70, "max_prompt_embedding_table_s": [29, 70, 87, 93], "max_record": 70, "max_seq_len": [10, 13, 15, 16, 29, 30, 43, 44, 54, 70, 73, 81, 82, 83, 84, 87, 89, 93, 98], "max_seqlen": [5, 82], "max_seqlen_for_logn_sc": 83, "max_sequence_length": [5, 87], "max_token": [30, 33, 34, 35, 45, 51, 60, 61, 62, 70, 81, 88, 94], "max_tokens_in_paged_kv_cach": [81, 87, 93], "max_util": [0, 70, 81], "max_verification_set_s": [53, 70], "max_window_s": [53, 70], "maxaccepteddrafttokensperstep": 1, "maxacceptedtoken": 1, "maxadapters": 0, "maxattentionwindow": 1, "maxattentionwindowvec": [0, 1], "maxbadwordslen": 1, "maxbatchs": [0, 1, 6], "maxbatchsizeruntim": 0, "maxbatchsizeruntimeupperbound": 0, "maxbatchsizestat": 0, "maxbatchsizetunerrecommend": 0, "maxbeamwidth": [0, 1, 3, 93], "maxdecoderstep": 1, "maxdecodingdrafttoken": 1, "maxdecodingtoken": [0, 1], "maxdraftpathlen": [0, 1], "maxdrafttoken": [0, 1], "maxencoderlen": 1, "maxgenerationlength": 1, "maxgenlengthdevic": 1, "maxgenlengthhost": 1, "maxgentoken": 1, "maxim": [0, 21, 23, 26, 28, 73, 81], "maximum": [0, 1, 2, 3, 5, 6, 20, 23, 29, 30, 70, 73, 74, 77, 82, 83, 87, 89, 92, 93, 98], "maxinputlen": [1, 6], "maxinputlength": 1, "maxlength": 1, "maxlengthstop": 0, "maxlorarank": 1, "maxmedusahead": 1, "maxnewtoken": [1, 93], "maxnonleafnodesperlay": 1, "maxnumactiverequest": 0, "maxnumblock": 0, "maxnumpath": 1, "maxnumsequ": [1, 93], "maxnumtoken": [0, 1], "maxnumtokensruntim": 0, "maxnumtokensstat": 0, "maxnumtokenstunerrecommend": 0, "maxoutputlength": 3, "maxpagesperblock": 1, "maxpagesperblockdevic": 0, "maxpagesperblockhost": 0, "maxpathdraftlen": 1, "maxpathlen": [0, 1], "maxpositionembed": [0, 1], "maxpromptembeddingtables": 1, "maxqueues": 0, "maxseqidlemicrosecond": 0, "maxseqlen": 1, "maxsequencelen": [1, 6], "maxsequencelength": 1, "maxstopwordslen": 1, "maxtoken": [0, 89, 93], "maxtokensperenginestep": 1, "maxtokensperstep": 1, "mb": [70, 89], "mbackend": 0, "mbackendagentdesc": 0, "mbart": [91, 93], "mbatchingtyp": 0, "mbatchsizet": 0, "mbeamsearchbuff": 1, "mbeamsearchdiversityr": 0, "mbeamwidth": 0, "mbeamwidtharrai": 0, "mbp": 45, "mbuffer": 1, "mbuffermanag": 1, "mc_handl": 1, "mc_ptr": 1, "mc_va": 1, "mcachemap": 1, "mcachemutex": 1, "mcachepagemanag": 1, "mcachest": 0, "mcachetransceiverconfig": 0, "mcapacityschedulerpolici": 0, "mcommmod": 0, "mcommptr": 1, "mcommstat": 0, "mcommtyp": 0, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mconfig": [0, 1], "mconnectioninfo": 0, "mcontextchunkingpolici": 0, "mcontextfmha": 1, "mcontextparallel": 1, "mcopyonpartialreus": 0, "mcpu": 1, "mcpudiff": 1, "mcrosskvcachefract": 0, "mcudagraphcaches": 0, "mcudagraphmod": 0, "mcumlogprobstmp": 1, "md": [2, 12, 14, 26, 82, 93, 96], "mdatatyp": [0, 1], "mdebugconfig": 0, "mdebuginputtensor": 0, "mdebugoutputtensor": 0, "mdebugtensornam": 0, "mdebugtensorsmaxiter": 0, "mdecod": 1, "mdecodedurationm": 0, "mdecoderetentionprior": 0, "mdecoderst": 1, "mdecoderstream": 1, "mdecodingconfig": 0, "mdecodinglayerworkspac": 1, "mdecodingmod": [0, 1], "mdefaulteaglechoic": 1, "mdefaultmedusachoic": 1, "mdefaultposteriorthreshold": 1, "mdesc": 0, "mdevic": 1, "mdevicebuffermanag": 1, "mdevicecacheperc": 0, "mdeviceid": [0, 1], "mdirectori": 0, "mdllmutex": 0, "mdogreedysampl": 1, "mdonetask": 1, "mdprank": 0, "mdpsize": 0, "mdrafttoken": 0, "mdstdesc": 0, "mdynamicbatchconfig": 0, "mdynamicbatchmovingaveragewindow": 0, "mdynamicdecodelay": 1, "mdynamictreemaxtopk": 0, "me": [34, 58, 59, 61, 88], "meaglechoic": 0, "meagleconfig": 0, "mean": [1, 4, 5, 6, 9, 12, 15, 17, 19, 20, 22, 23, 27, 28, 30, 38, 39, 56, 58, 70, 72, 73, 74, 75, 76, 81, 82, 85, 87, 89], "meaning": [1, 28, 77, 80], "meant": 78, "mearlystop": 0, "measur": [0, 21, 23, 24, 25, 27, 28, 64, 73, 75, 93], "mechan": [3, 16, 98, 99], "media": [73, 93], "media_path": 73, "medium": [25, 92, 93], "medusa": [0, 1, 29, 40, 41, 64, 70, 82, 84, 87, 93], "medusa_choic": [12, 54, 70, 73, 87], "medusa_decode_and_verifi": 87, "medusa_hidden_act": 86, "medusa_logit": 87, "medusa_model_dir": 86, "medusa_output_token": 87, "medusa_path": 87, "medusa_position_offset": 87, "medusa_temperatur": [12, 87], "medusa_topk": 87, "medusa_tree_id": 87, "medusachoic": [0, 1], "medusaconfig": 84, "medusacurtokensperstep": 1, "medusadecodingconfig": [54, 70], "medusaforcausallm": 84, "medusainput": 1, "medusalogit": 1, "medusapath": 1, "medusatargettokensperstep": 1, "medusatreeid": 1, "meet": [25, 82], "membeddingt": 0, "member": [0, 1, 6, 7, 13, 16, 59, 82], "memlock": [65, 92], "memori": [0, 1, 2, 4, 5, 6, 8, 10, 16, 17, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 36, 52, 64, 70, 73, 74, 75, 79, 80, 82, 87, 92, 93, 97, 98], "memorydesc": 0, "memorypoolfre": [1, 89], "memorypoolreserv": [1, 89], "memorypooltrimto": 1, "memorypoolus": 1, "memorytyp": [0, 1], "memorytypestr": 1, "memtyp": 1, "memusagechang": 89, "menableattentiondp": [0, 1], "menablebatchsizetun": 0, "menableblockreus": 0, "menablechunkedcontext": 0, "menablecontextfmhafp32acc": 0, "menablemaxnumtokenstun": 0, "menablepartialreus": 0, "menabletrtoverlap": 0, "mencodedvocab": 0, "mencoderhiddens": 1, "mengineaddr": 1, "menginebuff": 1, "menginepath": 1, "mengines": 1, "mental": 58, "mention": [6, 19, 20, 36, 77], "menu": [31, 32], "merg": [26, 82], "meshgrid": 82, "meshgrid2d": 82, "messag": [11, 26, 30, 33, 34, 60, 61, 67, 70, 74, 82, 88, 89, 93], "met": [0, 1, 3, 12], "meta": [19, 69, 70, 73, 74, 75, 81, 88, 91], "meta_ckpt_dir": 84, "metadata": [8, 95, 97], "metal": [93, 94], "meth": 69, "method": [0, 1, 3, 5, 6, 12, 13, 15, 16, 19, 21, 27, 28, 36, 52, 67, 70, 73, 87, 90, 92, 93, 95, 96, 98, 99], "metric": [0, 28, 70, 72, 73, 74, 75, 77, 79, 80, 93], "mevent": 1, "meventbuffermaxs": 0, "mexecutionconfig": 1, "mextendedruntimeperfknobconfig": 0, "mfastlogit": 0, "mfinishedstep": 1, "mfirstgentoken": 0, "mflagptr": 1, "mfreegpumemoryfract": 0, "mfreepageid": 1, "mfrequencypenalti": 0, "mfuntowicz": 93, "mgathergenerationlogit": 0, "mgemmallreducedtyp": 1, "mgmn": [40, 41], "mgpu": 1, "mgpudiff": 1, "mgpuspernod": 1, "mgpuweightsperc": 0, "mgreedysampl": 0, "mguid": 0, "mguideddecodingconfig": 0, "mguidetyp": 0, "mh": 12, "mh1": 12, "mha": [5, 8, 21, 28, 29, 82, 87, 97], "mhandler": 0, "mhiddens": 1, "mhostcaches": 0, "mi": 90, "mib": 89, "micro": [0, 89], "microbatchid": 0, "microbatchschedul": [96, 99], "microsecond": 0, "microsoft": 15, "middl": 72, "might": [0, 3, 16, 19, 20, 25, 29, 65, 69, 71, 73, 75, 76, 80, 87, 89, 92, 93, 98], "migrat": [19, 85, 93], "million": [59, 73], "millisecond": 0, "millisecondstyp": 0, "mimpl": 0, "min": [0, 1, 6, 22, 26, 27, 28, 70, 73, 75, 80, 82, 92], "min_lat": 82, "min_length": [6, 87], "min_p": [0, 6, 70, 87], "min_token": 70, "mind": [25, 81], "mindim": 1, "mindimfirst": 1, "mini": 93, "minim": [26, 79, 88], "minimum": [0, 5, 6, 70, 73, 74, 77, 82, 87, 89], "minitron": [91, 93], "minittozero": 1, "minlength": [1, 6, 93], "minnormedscorescba": 1, "minor": [59, 93], "minp": [0, 1, 6], "minprogresstask": 1, "minputpack": 1, "minputtokenextraid": 0, "mintoken": [0, 93], "mintpsplitdim": 1, "minut": [0, 25, 75], "mip": 0, "mipcmemoryhandl": 1, "mirco": 0, "mish": 83, "mismatch": [19, 67, 92], "misorchestr": 0, "mispagefre": 1, "miss": [0, 7, 20, 73, 93], "missedblock": 0, "missedblocksperrequest": 0, "mission": 26, "mistral": [4, 69, 73, 77, 80, 90, 91, 93], "mistralai": [73, 91], "mistralforcausallm": 91, "misus": 93, "miterstatsmaxiter": 0, "mitig": [19, 26], "mix": [2, 28, 76, 80, 93], "mixed_precis": 70, "mixed_sampl": 70, "mixer": 93, "mixtral": [4, 10, 69, 73, 77, 80, 90, 91, 93], "mixtralforcausallm": 91, "mixtur": [28, 64, 80, 93], "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "mkdir": 31, "mkdtemp": [46, 49], "mkvcacheconfig": 0, "mkvcachetyp": 1, "mkvfactor": 0, "ml": [82, 93], "mla": [26, 27, 82, 93], "mlayertyp": 1, "mlen": 0, "mlengthpenalti": 0, "mllama": [91, 93], "mllamaconfig": 84, "mllamaforcausallm": 84, "mllamaforconditionalgener": 91, "mlogit": 0, "mlogitsdtyp": 1, "mlogitspostprocessorconfig": 0, "mlookaheaddecodingconfig": 0, "mlookaheaddecodingmaxnumrequest": 0, "mloramodul": 1, "mloraprefetchdir": 0, "mlp": [10, 14, 16, 17, 29, 82, 92, 93, 95], "mlp_4h_to_h": [10, 29], "mlp_bia": 84, "mlp_gate": [10, 29], "mlp_gate_up": [10, 29], "mlp_h_to_4h": [10, 29], "mlp_output": 92, "mlp_router": [10, 29], "mlphiddens": 1, "mlptype": 82, "mm": 93, "mm_data": 73, "mm_embedding_offload": 87, "mma": [28, 82], "mmanag": 1, "mmanagedweightsmap": 1, "mmanageweightstyp": 1, "mmaxadapters": 0, "mmaxattentionwindow": 0, "mmaxattentionwindowvec": 0, "mmaxbatchs": [0, 1], "mmaxbeamwidth": [0, 1], "mmaxdecodingdecodertoken": 1, "mmaxdecodingdrafttoken": 1, "mmaxdecodingenginetoken": 1, "mmaxdraftpathlen": 1, "mmaxencoderlen": 1, "mmaxinputlen": 1, "mmaxlorarank": 1, "mmaxnonleafnodesperlay": 1, "mmaxnumpackedmask": 1, "mmaxnumpath": 1, "mmaxnumtoken": [0, 1], "mmaxpagesperblock": 1, "mmaxpagesperblockdevic": 0, "mmaxpagesperblockhost": 0, "mmaxpositionembed": 1, "mmaxpromptembeddingtables": 1, "mmaxqueues": 0, "mmaxseqidlemicrosecond": 0, "mmaxsequencelen": 1, "mmaxsequencelength": 1, "mmaxtoken": 0, "mmedusachoic": 0, "mmemorytyp": 1, "mmha": [82, 93], "mminp": 0, "mmintoken": 0, "mmlphiddens": 1, "mmlu": [25, 26, 93], "mmlu_llmapi": 93, "mmmu": 73, "mmodelconfig": [0, 1], "mmodelnam": 1, "mmodelvari": 1, "mmoduleidtomodul": 1, "mmropepositiondelta": 0, "mmroperotarycossin": 0, "mmultiblockmod": 0, "mname": [0, 1], "mnbattentionlay": 1, "mnbhead": 1, "mnbkvheadsperlay": 0, "mnblayer": 1, "mnbrnnlayer": 1, "mngramsiz": 0, "mnorepeatngrams": 0, "mnormalizelogprob": 0, "mnumcopystream": [0, 1], "mnumdecodingenginetoken": 1, "mnumdevicemodulelay": 0, "mnumensurework": 0, "mnumhostmodulelay": 0, "mnumkvheadsperattentionlay": 1, "mnumkvheadspercrossattentionlay": 1, "mnumlanguag": 1, "mnumnod": 0, "mnumputwork": 0, "mnumreturnbeam": 0, "mnumreturnsequ": 0, "mnumsm": 1, "mnumtransformerslay": 1, "modal": 90, "mode": [0, 1, 4, 5, 7, 16, 17, 28, 29, 30, 45, 55, 56, 57, 70, 81, 82, 83, 87, 89, 90, 93, 95], "model": [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 13, 15, 19, 21, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 66, 67, 70, 71, 72, 75, 78, 81, 82, 83, 85, 86, 87, 89, 90, 94, 97, 98, 99], "model_architectur": 70, "model_cl": 83, "model_cls_fil": 29, "model_cls_nam": 29, "model_config": [29, 70, 87, 95], "model_construct": 70, "model_dir": [10, 13, 14, 15, 16, 17, 19, 27, 54, 55, 73, 76, 84, 86, 88, 92], "model_engin": 98, "model_nam": [56, 74, 87], "model_path": [13, 56, 72, 73], "model_post_init": 70, "model_qu": 73, "model_weights_load": [17, 93], "modelconfig": [0, 6, 87, 93, 95], "modelengin": [96, 98], "modelidtomodel": 1, "modeling_deepseekv3": [26, 28], "modeling_llama": 95, "modeling_mymodel": 95, "modeling_opt": 95, "modeling_util": [70, 95], "modelnam": 1, "modelopt": [15, 19, 54, 67, 73, 74, 86, 93], "modelopt_cuda_ext": 67, "modelpath": 0, "modelrunn": [15, 87, 93], "modelrunnercpp": [87, 93], "modelrunnermixin": 87, "modeltyp": [0, 13], "modelvari": 1, "modelweightsformat": 17, "modelweightsload": [17, 93], "modern": 87, "modif": [7, 16], "modifi": [3, 7, 65, 73, 77, 80, 81, 92, 93], "modul": [0, 1, 5, 6, 14, 15, 16, 17, 26, 29, 64, 65, 70, 80, 82, 83, 84, 86, 87, 92, 93, 95], "modular": 71, "module1": 26, "module10": 26, "module11": 26, "module12": 26, "module13": 26, "module2": 26, "module3": 26, "module4": 26, "module5": 26, "module6": 26, "module7": 26, "module8": 26, "module9": 26, "module_id": 10, "moduleid": [1, 10], "moduleidtomodel": 1, "modulelist": 95, "moduletyp": 1, "modulo": 82, "moe": [10, 17, 26, 27, 29, 50, 64, 70, 80, 82, 84, 93], "moe_4h_to_h": [10, 29], "moe_allreduce_residual_rms_norm": 82, "moe_backend": [20, 27, 70], "moe_cluster_parallel_s": 70, "moe_ep_s": 4, "moe_expert_parallel_s": [50, 70], "moe_gat": [10, 29], "moe_h_to_4h": [10, 29], "moe_load_balanc": 70, "moe_max_num_token": 70, "moe_plugin": 29, "moe_rout": [10, 29], "moe_tensor_parallel_s": [50, 70], "moe_tp_siz": 4, "moeconfig": 84, "moetopk": 93, "moment": 3, "monboardblock": 0, "monitor": [8, 29], "monitor_memori": [29, 70], "monolith": 5, "monost": 0, "month": 73, "mop": 0, "mopenipc": 1, "moptimaladapters": 0, "morchestratorconfig": 0, "morchleadercomm": 0, "more": [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 15, 16, 21, 22, 23, 25, 26, 27, 28, 29, 30, 36, 40, 51, 52, 59, 65, 70, 71, 73, 74, 75, 77, 79, 80, 81, 82, 88, 89, 92, 93, 95, 97, 99], "most": [0, 1, 6, 8, 12, 16, 19, 21, 22, 23, 25, 26, 28, 42, 46, 47, 49, 50, 70, 72, 78, 80, 81, 82, 89, 92, 93], "mount": [30, 55, 56, 57], "mount_dest": [55, 56, 57], "mount_dir": [55, 56, 57], "moutdim": 1, "moutdimfirst": 1, "moutputbeamhypothes": 1, "mouttpsplitdim": 1, "move": [0, 1, 8, 19, 52, 70, 71, 82, 92, 93], "movement": [8, 16], "mownsev": 1, "mownsstream": 1, "mp4": [34, 61], "mpageblock": 1, "mpagedcontextfmha": 1, "mpagedst": 1, "mpagemanagerconfig": 1, "mpagesmutex": 1, "mpagewidth": 1, "mparallelconfig": 0, "mparticipantid": 0, "mpeftcacheconfig": 0, "mpi": [0, 1, 2, 6, 16, 18, 19, 29, 30, 55, 56, 57, 67, 70, 72, 73, 75, 82, 92, 93], "mpi4pi": [69, 75, 92, 93], "mpi_abort": 69, "mpi_barri": 19, "mpi_comm_world": [6, 69], "mpi_group_barri": 1, "mpicomm": 0, "mpicommsess": 70, "mpin": 1, "mpinneddiff": 1, "mpinnedpool": 1, "mpinnedpooldiff": 1, "mpipelineparallel": [0, 1], "mpirun": [15, 16, 69, 75, 92, 93], "mpisess": 70, "mpistat": 0, "mpointer": 1, "mpool": 1, "mport": 0, "mposteriorthreshold": 0, "mppreducescatt": 1, "mprecis": 1, "mpresencepenalti": 0, "mprocessorbatch": 0, "mprocessormap": 0, "mprompttableoffload": 0, "mpt": [25, 90, 91, 93], "mptforcausallm": 84, "mptmodel": 84, "mqa": [5, 8, 21, 24, 26, 29, 82, 93, 97], "mquantmod": 1, "mrank": [0, 1], "mrecvpollperiodm": 0, "mremotenam": 0, "mrepetitionpenalti": 0, "mreplic": 0, "mreqid": 0, "mrequeststatsmaxiter": 0, "mrnnconfig": 1, "mrope": [0, 82], "mrope_param": [83, 87], "mrope_position_delta": [82, 83, 87], "mrope_rotary_cos_sin": [82, 83], "mrope_rotary_cos_sin_s": 84, "mropeconfig": 0, "mropeparam": [83, 87], "mropepositiondelta": 0, "mroperoratysinco": 0, "mrotaryembeddingdim": 1, "mruntimedefault": 1, "mruntimestream": 1, "msamplingconfig": 1, "mscale": 82, "mscale_all_dim": 82, "mschedulerconfig": 0, "msecondaryofflineminprior": [0, 70], "msecondaryoffloadminprior": 0, "mseed": 0, "mselfidx": 0, "msg": [0, 1, 26, 70], "msinktokenlength": 0, "msizeperhead": [0, 1], "mskipcrossattnblock": 1, "msl": 1, "mslotsperpag": 1, "mspawnprocess": 0, "mspeculativedecodingconfig": 0, "mspeculativedecodingmod": 1, "mspeculativedecodingmodul": 1, "msrcdesc": 0, "mstate": [0, 1], "mstoptokenid": 0, "mstream": 1, "msyncmessag": 0, "mt5": 91, "mtag": 0, "mtaskid": 0, "mtemperatur": 0, "mtensor": 0, "mtensorparallel": [0, 1], "mtoken": 0, "mtokenizerstr": 0, "mtokenrangeretentionconfig": 0, "mtokensperblock": [0, 1], "mtopk": 0, "mtopp": 0, "mtoppdecai": 0, "mtoppmin": 0, "mtoppresetid": 0, "mtotalnumpag": 1, "mtp": [20, 70, 93], "mtp3_autoregress": 26, "mtp3_top1": 26, "mtp3_top10": 26, "mtp3_top15": 26, "mtp3_vanilla": 26, "mtpdecodingconfig": 70, "mtprank": 1, "mtransfermod": 0, "mtrimpool": 1, "mtype": [0, 1], "much": [9, 16, 27, 72, 74, 79, 89], "mul": 82, "multi": [0, 2, 3, 4, 6, 9, 10, 12, 15, 18, 19, 21, 27, 28, 29, 34, 55, 56, 57, 61, 64, 65, 69, 70, 75, 82, 84, 89, 90, 93, 97], "multi_block_mod": [5, 70, 87, 93], "multiblockmod": 0, "multidimension": 82, "multihead": [16, 21], "multimod": [0, 29, 63, 73, 87, 91, 93], "multimodalembed": 0, "multimodalmodelrunn": 87, "multinod": 76, "multinomi": 6, "multipl": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 16, 17, 26, 27, 28, 29, 40, 41, 70, 71, 75, 76, 77, 79, 82, 83, 87, 88, 92, 93, 97], "multiple_profil": [29, 73, 77, 80, 93], "multipli": [5, 17, 28, 82], "multiply_and_lora": 83, "multiply_collect": 83, "multiprocessor": 16, "munsign": 1, "musecrossattent": 1, "musedynamictre": 0, "musegemmallreduceplugin": 1, "musegptattentionplugin": 1, "musegpudirectstorag": 0, "museloraplugin": 1, "musemambaconv1dplugin": 1, "musemrop": 1, "musepositionembed": 1, "museshapeinfer": 1, "musetokentypeembed": 1, "must": [0, 1, 2, 3, 4, 5, 6, 9, 10, 12, 16, 18, 29, 30, 32, 45, 70, 77, 82, 83, 85, 87, 90, 92], "mutabl": [0, 1], "mutablepageptr": 1, "mutex": [0, 1], "mutual": [6, 90], "muvm": 1, "muvmdiff": 1, "mverificationsets": 0, "mversion": 1, "mvocabs": 1, "mvocabsizepad": 1, "mweight": 0, "mwindows": 0, "mworkerexecutablepath": 0, "mworldconfig": 1, "my": [1, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 59, 66, 67, 73, 88, 94], "my_faster_on": 36, "my_model": 14, "my_profile_export": [30, 38, 39], "myattent": 95, "mybatchedlogitsprocessor": 52, "myconfig": 95, "mydecoderlay": [14, 95], "mylogitsprocessor": 52, "mymodel": [14, 95], "mymodelforcausallm": [14, 95], "n": [1, 2, 5, 10, 12, 15, 16, 27, 28, 30, 42, 45, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 69, 70, 73, 75, 79, 82, 83, 84, 89, 90, 92, 93], "n_worker": 70, "na": [73, 93], "naiv": 80, "naivepatternrewriter_replaceaddwithsub": 7, "name": [0, 1, 3, 6, 7, 10, 15, 16, 30, 31, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 56, 59, 66, 67, 69, 70, 73, 74, 75, 82, 84, 85, 86, 87, 88, 92, 93, 94, 95], "named_network_output": 92, "named_paramet": 17, "namespac": [0, 1, 69, 84], "nation": 73, "nationwid": 73, "nativ": [19, 22, 28, 93, 95], "native_quant_flow": 84, "natur": [19, 28, 34, 61, 75], "naur": [0, 3, 70], "nb": 84, "nbattentionlay": [0, 1], "nbdim": 1, "nbhead": 1, "nbkvhead": [0, 1], "nbkvheadperlay": 0, "nblayer": 1, "nbrnnlayer": 1, "nccl": [16, 26, 29, 82, 92, 93], "nccl_p2p_level": 93, "nccl_plugin": 29, "ncclplugin": 16, "ncclrecv": 82, "ncclsend": 82, "nd": [73, 82], "ndarrai": [82, 83, 87], "ndim": 82, "nearest": [28, 70, 82], "nearli": [7, 22, 28], "necess": 12, "necessari": [1, 4, 12, 26, 28, 58, 77, 82, 93, 98], "necessarili": [1, 16, 89], "need": [1, 2, 3, 5, 6, 7, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 27, 28, 30, 31, 36, 40, 45, 50, 55, 56, 57, 58, 65, 66, 67, 69, 70, 71, 73, 74, 75, 76, 77, 79, 80, 81, 82, 84, 85, 87, 88, 89, 92, 93, 95, 96, 97, 98, 99], "needed_block": 99, "needsdecoderprologu": 1, "needskvcacherewind": 1, "neg": [1, 70, 81, 82], "neglig": [9, 25, 79], "neither": [3, 82, 89], "nemo": [15, 18, 29, 71, 75, 87, 90, 91, 93], "nemo_ckpt_dir": 84, "nemo_prompt_convert": 87, "nemotron": [91, 93], "nemotron_na": 93, "nemotronforcausallm": 91, "nemotronna": [91, 93], "nemotronnasforcausallm": 91, "neox": [5, 6, 90, 91, 93], "nest": 7, "net": [9, 70, 92], "net_guard": 7, "network": [3, 4, 5, 7, 11, 16, 18, 19, 28, 29, 45, 82, 88, 89, 90, 92, 93], "neural": [4, 7, 16, 88, 93], "neva": [91, 93], "never": [7, 73, 81], "new": [0, 1, 3, 5, 6, 7, 9, 10, 12, 13, 19, 22, 23, 26, 27, 30, 31, 33, 35, 42, 46, 47, 48, 49, 50, 52, 60, 62, 64, 65, 69, 70, 71, 79, 80, 82, 87, 88, 93, 94, 96, 98], "new_decoder_architectur": [15, 84], "new_generated_id": 87, "new_input": 7, "new_out": 7, "new_shap": 82, "new_tensor": 82, "new_token": 87, "new_workflow": 93, "newactiverequestsqueuelatencym": [0, 30], "newer": [91, 93], "newest": [23, 70], "newli": [0, 70, 79], "newsiz": 1, "newtoken": 1, "newtokensstep": 1, "newtokensvec": 1, "newvalu": 0, "next": [1, 10, 12, 16, 19, 22, 27, 64, 65, 71, 76, 77, 79, 80, 81, 87, 89, 91, 93], "next_logit": 87, "next_medusa_input_id": 87, "next_medusa_logit": 87, "next_step_buff": 87, "next_step_tensor": 87, "nextdraftindic": 1, "nextdraftlen": 1, "nextdraftpath": 1, "nextdraftprob": 1, "nextdrafttoken": 1, "nextdrafttokenslen": 1, "nextflattoken": 1, "nextgenerationlength": 1, "nextn": 27, "nextpositionoffset": 1, "ngc": [66, 67, 88, 93, 94], "ngoanpv": 93, "ngram": [0, 6, 70, 84], "ngramdecodingconfig": 70, "ngramsiz": 0, "ngroup": 82, "nhead": 82, "nhere": 45, "ni": [45, 90], "nine": 88, "nj": 48, "njane": [42, 46, 47, 48, 49, 50, 52], "njason": 58, "nmh": 87, "nmt": [87, 91, 93], "nn": [82, 95], "no_quant": 70, "no_repeat_ngram_s": [6, 70, 87], "no_schedule_after_st": 99, "no_schedule_until_st": 99, "noauxtckernel": 26, "node": [0, 2, 6, 11, 18, 27, 28, 29, 55, 56, 57, 64, 69, 70, 72, 75, 76, 82, 87, 90, 92, 93], "noexcept": [0, 1], "nomin": [42, 46, 47, 48, 49, 50], "non": [0, 2, 5, 8, 13, 16, 19, 25, 26, 27, 28, 29, 52, 70, 82, 92, 93], "non_block": 52, "non_gated_vers": 82, "none": [1, 6, 7, 14, 17, 19, 29, 30, 36, 51, 52, 53, 54, 58, 59, 70, 73, 75, 79, 82, 83, 84, 85, 86, 87, 92, 93, 95, 97], "nonetyp": [70, 87], "nonzero": 82, "nor": 89, "norepeatngrams": [0, 1, 6], "norm": [17, 20, 28, 56, 72, 73, 74, 75, 82, 93, 95], "norm_before_bmm1": [83, 84], "norm_elementwise_affin": 83, "norm_ep": 83, "norm_epsilon": [15, 84], "norm_factor": 5, "norm_num_group": 83, "norm_pre_residual_weight": 82, "norm_quant_fus": 29, "norm_typ": 83, "norm_weight": 82, "normal": [0, 6, 9, 10, 13, 25, 26, 27, 28, 70, 73, 82, 89, 93], "normalize_log_prob": 70, "normalize_weight": 10, "normalized_shap": [82, 83], "normalizelogprob": [0, 1], "normedscorescba": 1, "north": [14, 16, 92], "northeastern": 88, "not_op": 82, "notabl": 25, "notat": 27, "note": [1, 2, 7, 9, 10, 11, 12, 16, 20, 23, 25, 26, 27, 28, 29, 32, 36, 51, 55, 56, 57, 59, 64, 65, 70, 73, 74, 77, 79, 81, 82, 85, 87, 89, 90, 91, 92, 94, 95, 98], "notic": [51, 58], "notifysyncmessag": 0, "notimplementederror": 19, "nougat": [90, 91, 93], "nour": 59, "now": [6, 12, 15, 17, 21, 26, 27, 71, 73, 79, 85, 88, 89, 93], "np": 82, "npy": 87, "npytorch_backend_config": 30, "nsight": 64, "nsy": 72, "ntask": [16, 30, 55, 56, 57], "null": [1, 15, 73, 88], "nullopt": [0, 1], "nullptr": [0, 1], "num": [0, 1, 20, 54, 56, 64, 70, 72, 73, 74, 75, 77, 78, 80], "num_attention_head": [15, 82, 83, 84], "num_aud_token": 87, "num_beam": [6, 87], "num_beam_group": 6, "num_block": [87, 98], "num_blocks_per_cache_level": 51, "num_bucket": [82, 83], "num_channel": [83, 84], "num_class": 83, "num_context": 97, "num_ctx_token": 97, "num_draft_token": [0, 82, 87], "num_eagle_lay": [43, 44, 70], "num_embed": 83, "num_experts_per_tok": 4, "num_gener": 97, "num_group": [82, 83], "num_head": [5, 17, 82, 87, 97], "num_hidden_lay": [15, 84, 95, 98], "num_imag": 87, "num_img_token": 87, "num_key_value_head": [15, 84, 98], "num_kv_head": [8, 82, 83, 87, 97, 98], "num_kv_heads_origin": 82, "num_kv_heads_per_cross_attn_lay": 87, "num_kv_heads_per_lay": 87, "num_lay": [82, 83, 87, 98], "num_ln_in_parallel_attn": 84, "num_local_block": 83, "num_local_expert": 4, "num_lora_module_lay": 10, "num_lora_modules_lay": 10, "num_medusa_head": [54, 70, 84, 86, 87], "num_medusa_lay": [84, 86], "num_multimodal_token": 0, "num_nextn_predict_lay": [20, 27, 70], "num_orig_po": 82, "num_po": 82, "num_postprocess_work": 30, "num_profil": 84, "num_q_head": 26, "num_request": [20, 27, 73, 74], "num_return_sequ": [87, 93], "num_sampl": 72, "num_task": 83, "num_token": [5, 26, 82, 97], "num_tokens_per_block": [82, 98], "num_tokens_per_task": 83, "num_video": 87, "numa": 11, "numacceptedtoken": 0, "numactiverequest": 0, "numattentionhead": 1, "numavailablepag": 1, "numbeamscba": 1, "number": [0, 1, 2, 3, 4, 5, 6, 8, 12, 16, 20, 24, 26, 27, 28, 29, 30, 52, 55, 56, 57, 70, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 87, 89, 90, 92, 93, 95, 97, 98], "numblockspercachelevel": 0, "numcompletedrequest": 0, "numcontextrequest": [0, 1], "numcopystream": [0, 1], "numctxsequ": 1, "numctxtoken": 0, "numdevicemodulelay": 0, "numdrafttoken": [0, 1], "numdrafttokenshost": 1, "numeaglelay": 1, "numel": 87, "numensurework": 0, "numer": [6, 11, 26, 64, 73, 88, 91], "numexpert": 1, "numgeneratedtoken": 0, "numgenrequest": 0, "numgensequ": 1, "numgentoken": 0, "numhead": 6, "numhostmodulelay": 0, "numkvattentionhead": 1, "numkvhead": 6, "numlanguag": 1, "numlay": 6, "nummissedblock": 0, "numnewactiverequest": 0, "numnewallocatedblock": 0, "numnewtokenscumsum": 93, "numnod": [0, 93], "numpag": 1, "numpausedrequest": 0, "numpi": [10, 82, 87], "numputwork": 0, "numqueuedrequest": [0, 93], "numrequestswithdrafttoken": 0, "numreturnbeam": 0, "numreturnsequ": [0, 1, 3], "numreusedblock": 0, "numscheduledrequest": 0, "numsequ": 1, "numslot": 1, "numtoken": 1, "numtotalallocatedblock": 0, "numtransformerslay": 1, "nvcc": 20, "nvcr": 93, "nvfp4": [26, 29, 59, 64, 70, 73, 93, 94], "nvidia": [15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 29, 31, 33, 34, 35, 37, 38, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 67, 71, 73, 74, 75, 80, 82, 88, 89, 91, 92, 93, 94], "nvila": [91, 93], "nvinfer1": [0, 1], "nvl": [1, 29, 93], "nvl36": 76, "nvl72": [28, 76], "nvlink": [2, 6, 11, 75, 76, 78, 93], "nvswitch": [16, 26], "nvtx": 70, "nyou": 45, "o": [0, 1, 7, 10, 19, 24, 26, 28, 55, 56, 57, 72, 92], "o_proj": 17, "oai": [34, 61], "obei": 92, "object": [0, 1, 3, 9, 14, 16, 17, 19, 36, 45, 70, 82, 83, 84, 85, 87, 88, 89, 96], "observ": [28, 51, 74], "obtain": [2, 18, 74, 82], "obviou": 28, "occas": 92, "occasion": 93, "occup": [5, 89], "occupi": [25, 28, 89], "occur": [6, 9, 98, 99], "odd": 52, "off": [9, 28, 72, 77, 79, 80, 89, 93], "offer": [16, 18, 25, 26, 71, 97], "offic": 45, "officenetsecur": 45, "offici": [5, 20, 27, 73], "offlin": [14, 23, 28, 40, 73, 74, 93], "offload": [0, 8, 13, 29, 64, 70, 93], "offset": [1, 82, 87, 90, 93], "offsetdim": 1, "ofitensor": 0, "often": [0, 3, 8, 12, 21, 25, 26, 70, 76, 77, 82], "ok": 92, "okai": 51, "old": [7, 10, 27, 92], "older": [9, 19, 65, 91], "oldest": [10, 70], "oldvalu": 0, "omit": [1, 3, 19, 82], "ompi": [67, 92], "onboard": [0, 9, 70, 89], "onboard_block": 70, "onboardblock": 0, "onc": [0, 3, 5, 6, 7, 16, 18, 65, 69, 70, 77, 82, 89], "one": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 19, 21, 26, 27, 28, 29, 30, 31, 58, 69, 70, 73, 75, 76, 77, 80, 81, 82, 83, 85, 87, 89, 92, 93, 95, 99], "ones": [0, 10], "oneshot": [26, 82], "oneshotallreduc": 26, "oneshotar": 26, "onevis": [91, 93], "ongo": [19, 59], "onli": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 19, 20, 25, 27, 28, 29, 30, 36, 52, 59, 64, 69, 70, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 85, 87, 89, 91, 93, 96, 99], "onlin": [18, 23, 40], "only_cross_attent": 83, "onnx": [29, 82], "onnx__gathernd": 82, "onto": 6, "oom": [20, 21, 24, 28, 89], "ootb": [28, 93], "op": [0, 1, 7, 28, 70, 82, 93], "op_and": 82, "op_or": 82, "op_xor": 82, "opaqu": 7, "opaque_st": 70, "open": [6, 21, 26, 28, 59, 71, 72, 92, 93], "openai": [30, 63, 88, 93], "openipc": 1, "openmpi": 93, "opensora": 93, "openssh": 31, "oper": [0, 1, 3, 5, 6, 7, 11, 12, 15, 16, 17, 26, 28, 29, 52, 70, 73, 76, 77, 80, 82, 88, 89, 91, 93, 96, 97, 98], "opportun": 73, "opt": [3, 15, 25, 28, 31, 82, 90, 91, 92, 93], "opt_batch_s": [70, 84], "opt_num_token": [29, 70, 84], "optforcausallm": [15, 84], "optim": [1, 2, 3, 6, 7, 8, 11, 12, 16, 18, 19, 21, 22, 23, 24, 25, 29, 46, 52, 54, 65, 69, 70, 71, 73, 74, 76, 77, 78, 82, 88, 89, 91, 92, 93, 94, 96, 97, 98], "optimaladapters": [0, 1], "option": [0, 1, 3, 6, 7, 8, 11, 12, 14, 19, 22, 27, 29, 30, 36, 52, 56, 58, 64, 67, 70, 72, 73, 74, 75, 76, 78, 79, 82, 85, 87, 89, 92, 93, 95, 97, 98], "optionalbufferptr": 1, "optionaltensorptr": 1, "optmodel": 84, "optvec": 1, "orchestr": [0, 2, 12, 92, 93], "orchestratorconfig": 0, "orchleadercomm": 0, "order": [0, 1, 2, 5, 8, 17, 21, 70, 73, 74, 77, 81, 82, 83, 89], "org": [0, 1, 4, 10, 29, 66, 67, 82, 90], "organ": [8, 71, 98], "orient": 28, "origin": [0, 5, 7, 10, 11, 27, 28, 82, 93, 95], "original_max_position_embed": [82, 83], "originaltemperatur": 1, "oserror": 93, "osl": [21, 22, 23, 24, 26, 27, 28, 73, 74, 80], "ostream": [0, 1], "other": [0, 1, 2, 3, 4, 5, 6, 9, 11, 12, 16, 17, 19, 21, 26, 27, 28, 29, 36, 49, 51, 55, 56, 57, 59, 65, 69, 70, 71, 74, 75, 76, 77, 79, 80, 81, 82, 85, 89, 92, 93, 97, 99], "other_audio_input": 87, "other_decoder_input": 87, "other_vision_input": 87, "othercach": 1, "otherwis": [0, 1, 3, 5, 6, 36, 70, 73, 82, 87, 92, 97], "our": [20, 25, 26, 27, 28, 42, 45, 46, 47, 49, 50, 73, 74, 77, 79, 80, 82, 91, 92, 93, 95], "out": [0, 1, 2, 10, 19, 21, 22, 23, 24, 26, 27, 28, 40, 55, 56, 57, 69, 72, 74, 77, 79, 80, 82, 88, 89, 93], "out_bia": 83, "out_channel": 83, "out_context_dim": 83, "out_dim": 83, "out_fatur": 15, "out_featur": [15, 16, 83], "out_hidden_s": 82, "out_of_tree_exampl": 95, "out_point": 82, "out_tp": [21, 24], "outdim": 1, "outdimfirst": 1, "outer": 82, "outlin": 72, "output": [0, 1, 2, 5, 6, 7, 9, 10, 12, 16, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 36, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 58, 59, 66, 67, 70, 72, 74, 75, 76, 77, 78, 80, 81, 82, 83, 87, 88, 92, 93, 94, 96, 97, 99], "output_cum_log_prob": 87, "output_dim": 83, "output_dir": [10, 13, 14, 15, 16, 19, 29, 73, 76, 84, 86, 88, 92], "output_dtyp": [82, 83], "output_generation_logit": 87, "output_id": 87, "output_log_prob": 87, "output_multiplier_scal": 84, "output_pad": [82, 83], "output_s": 83, "output_seqlen": [21, 24], "output_sequence_length": 87, "output_timing_cach": [29, 70], "output_token": 73, "outputbuff": 1, "outputconfig": [0, 3, 36, 93], "outputidscba": 1, "outputlen": 0, "outputlogprob": 1, "outputtokenid": [0, 3], "outsid": [12, 18, 19, 97], "outsiz": 1, "outstand": 27, "outtpsplitdim": 1, "outweigh": 76, "over": [0, 1, 9, 12, 17, 20, 22, 23, 25, 26, 28, 32, 70, 72, 73, 76, 79, 80, 82, 93], "overal": [3, 5, 9, 11, 12, 20, 27, 28, 71, 76, 77, 79, 80, 81, 95], "overcom": [5, 16, 26], "overflow": 1, "overhead": [0, 3, 16, 26, 27, 28, 76, 93, 97], "overiew": 73, "overlap": [0, 2, 12, 20, 26, 27, 28, 70, 93, 99], "overload": [0, 1], "overrid": [1, 17, 19, 36, 70, 82, 87], "override_field": 84, "overshadow": 76, "oversubscrib": [69, 75], "overview": [3, 8, 20, 25, 64, 65, 72, 73, 75, 94, 96], "overwhelm": 58, "overwrit": [5, 30], "own": [0, 1, 2, 9, 12, 15, 16, 17, 18, 19, 20, 27, 36, 65, 95], "ownership": 0, "ownsev": 1, "ownsstream": 1, "p": [0, 6, 12, 18, 31, 55, 56, 57, 70, 84, 87, 93], "p2p": 82, "p50": [73, 74], "p90": [73, 74, 75], "p95": [73, 74, 75], "p99": [73, 74, 75], "p_max": 0, "p_x": 0, "pack": [0, 1, 6, 29, 64, 81, 82, 84, 89, 95], "packag": [3, 65, 66, 67, 73, 75, 92, 93], "packed_length": 84, "packedinput": 1, "packedmask": 1, "packedmaskhost": 1, "packedmaskhostcopi": 1, "packedmasksdevic": 1, "packedpositionid": 1, "pad": [0, 1, 6, 7, 10, 28, 29, 30, 64, 70, 71, 82, 83, 87, 89, 93], "pad_id": [70, 87], "pad_lda": 83, "pad_ldc": 83, "pad_token_id": 87, "padding_2d": 82, "padding_back": 82, "padding_bottom": 82, "padding_front": 82, "padding_left": 82, "padding_mod": 83, "padding_right": 82, "padding_top": 82, "padid": 0, "page": [1, 2, 6, 9, 16, 23, 29, 64, 69, 73, 75, 77, 82, 88, 89, 93, 97], "paged_context_fmha": [77, 93], "paged_kv_cach": [10, 29, 73, 87], "paged_st": [29, 87], "pagedcontextfmha": 1, "pagedkvcach": 6, "pagedst": 1, "pageid": 1, "pageidx": 1, "pagemanagerconfig": 1, "pageptr": 1, "pagewidth": 1, "pair": [0, 1, 21, 70, 77, 80, 82], "pale": 51, "paper": [2, 10, 12, 22, 27, 28, 90, 97], "par": [79, 80], "parallel": [0, 2, 3, 5, 6, 12, 15, 16, 20, 21, 23, 24, 27, 30, 40, 41, 50, 52, 64, 70, 74, 77, 78, 82, 83, 84, 89, 93, 95, 99], "parallel_attent": [15, 84], "parallelconfig": [0, 93], "param": [0, 1, 17, 46, 47, 48, 50, 51, 59, 70, 82, 83, 84, 87], "paramet": [0, 1, 3, 4, 5, 8, 9, 10, 12, 13, 15, 16, 17, 19, 20, 28, 29, 30, 55, 70, 73, 76, 77, 78, 81, 82, 83, 84, 87, 89, 93, 97], "parametr": 87, "parent": [0, 1, 17, 19], "parent_hash": 51, "parenthash": 0, "parentid": 1, "pari": [42, 46, 47, 48, 49, 50, 59], "pars": [1, 70], "parse_arg": 54, "parser": [30, 54, 63], "part": [1, 3, 4, 7, 16, 17, 19, 28, 64, 65, 69, 70, 71, 74, 79, 80, 81, 82, 87, 89], "part2": 93, "parti": 93, "partial": [0, 4, 9, 16, 26, 70, 76], "particip": [0, 59, 82, 93], "participantid": [0, 2], "particular": [0, 3, 69, 78, 79, 80, 88], "particularli": [26, 28, 65, 80, 98], "partit": [5, 10, 16, 55, 56, 57], "pass": [0, 1, 3, 5, 7, 9, 10, 12, 16, 17, 36, 52, 58, 59, 70, 72, 73, 75, 77, 79, 80, 82, 83, 84, 87, 89, 93, 94, 95, 96, 97, 99], "past": [0, 5, 27], "past_key_valu": [82, 83], "past_key_value_length": 83, "past_key_values_length": 83, "past_kv_length": 87, "past_sequence_length": 87, "patch": [83, 87], "patch_siz": [83, 84], "path": [0, 1, 3, 5, 12, 15, 17, 20, 27, 29, 30, 36, 46, 47, 48, 49, 50, 54, 55, 56, 57, 59, 65, 69, 70, 72, 73, 74, 75, 77, 82, 87, 93], "path_to_llama_from_hf": 96, "path_to_meta_llama_from_hf": 69, "path_to_trt_engin": 69, "pathlib": [54, 70], "pathlik": 84, "pathorn": 93, "pathsoffset": 1, "pattern": [4, 26, 28, 64, 70, 82, 93], "patternanalyz": 7, "patternrewrit": 7, "paus": [0, 81, 99], "paused_request": 99, "pcie": [11, 29], "pdf": [0, 4, 10], "pdl": [26, 93], "peak": [0, 20, 21, 22, 26, 74], "peft": 70, "peft_cache_config": [36, 49, 70], "peftcacheconfig": [0, 70], "peftcachemanag": [0, 93], "penal": [0, 6, 70], "penalti": 93, "penalty_alpha": 6, "pend": 99, "pending_request": 99, "per": [0, 1, 3, 5, 6, 8, 11, 12, 16, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 55, 56, 57, 70, 73, 74, 75, 76, 77, 82, 83, 89, 90, 93], "per_channel": 90, "per_group": 90, "per_token": 90, "per_token_scal": 82, "perceiv": 22, "percent": [0, 13], "percentag": [10, 13, 73, 74, 75], "percentil": [73, 93], "perf": [0, 20, 28, 30, 63, 70, 82, 93], "perf_best_practic": 93, "perform": [0, 1, 2, 3, 5, 6, 7, 10, 16, 17, 18, 19, 21, 23, 24, 27, 28, 29, 30, 36, 65, 69, 70, 71, 73, 74, 76, 79, 81, 82, 87, 88, 91, 93, 95, 97, 98], "performantli": 21, "permut": 82, "persimmon": 93, "persist": [25, 69], "person": [31, 58], "phase": [0, 2, 7, 12, 21, 24, 26, 27, 28, 29, 64, 73, 78, 79, 80, 81, 82, 89, 93, 97, 98], "phi": [69, 82, 90, 91, 93], "phi3config": 84, "phi3forcausallm": 84, "phi3model": 84, "phiconfig": 84, "phiforcausallm": 84, "phimodel": 84, "physic": [82, 89], "picasso": 59, "pick": 79, "pickl": 93, "piec": 79, "piecewis": 70, "pin": [0, 1, 9], "ping": 93, "pinnedmemusag": 0, "pinnedpool": 1, "pip": [20, 30, 65, 66, 67, 88, 93], "pip3": [66, 67], "pipelin": [0, 1, 3, 6, 16, 21, 24, 29, 30, 50, 70, 73, 74, 78, 89, 93, 99], "pipeline_parallel_s": [50, 70, 76, 77], "pipelineparallel": [0, 1, 6], "pipelineparallelismrank": 1, "pitfal": [9, 19], "pixart": 83, "pixartalphatextproject": 83, "pixel_valu": 84, "pl": [67, 73], "place": [1, 29, 51, 67, 82, 93, 95], "placement": 26, "plai": 79, "plan": [3, 5, 26, 65], "planner": 93, "platform": [31, 32, 42, 46, 47, 49, 50, 65, 71, 73, 93, 94], "pleas": [2, 5, 7, 11, 12, 14, 21, 23, 24, 25, 26, 28, 32, 36, 45, 52, 65, 67, 73, 74, 76, 78, 82, 92, 93, 94, 99], "plu": [11, 87], "plugin": [5, 6, 7, 13, 15, 64, 65, 70, 79, 82, 84, 88, 89, 90, 92, 93], "plugin_config": [70, 77, 80, 82, 84], "plugin_namespac": 7, "plugin_typ": 7, "plugin_v2": 7, "plugin_v2_gemm_0": 92, "pluginconfig": [70, 85], "pluginconfigmeta": 85, "pluginfield": 93, "pluginv2build": 92, "pm": [20, 26, 73], "pmi": 92, "pmi2_init": 92, "pmix": [16, 30, 55, 56, 57, 92], "png": [34, 39, 61], "po": 83, "point": [1, 5, 16, 18, 22, 25, 40, 45, 50, 66, 67, 69, 70, 74, 76, 81, 82, 88, 90, 92, 93], "pointer": [0, 1, 6, 17, 82, 87, 93], "pointerelementtyp": 1, "polar": 91, "polici": [0, 1, 2, 70, 73, 75, 89], "poll": [0, 30], "polyhedr": 16, "pong": 93, "pool": [0, 1, 5, 28, 64, 70, 82, 87, 98, 99], "pooled_project": [83, 84], "pooled_projection_dim": 83, "pooledpin": 0, "poor": 2, "popd": 92, "popfirstgentoken": 0, "popul": [1, 5, 16, 59, 82], "popular": [5, 15, 19, 25, 27, 32, 69], "port": [0, 30, 32, 37], "portfolio": 23, "portion": [4, 76, 82, 89], "pos_emb_typ": 82, "pos_embd_param": 97, "pos_embed_max_s": 83, "pos_embed_typ": 83, "pose": 80, "posit": [0, 1, 12, 26, 27, 70, 73, 82, 83, 87, 93, 97], "position_embed": [82, 83], "position_embedding_typ": [5, 15, 82, 83, 84], "position_encoding_2d": 84, "position_id": [84, 87, 92, 95, 97], "positionalembeddingparam": 97, "positionembeddingtyp": [5, 82, 83, 84], "positionid": [0, 1], "positionidsbas": 1, "positionidsdevic": 1, "positionidshost": 1, "positionidshostcopi": 1, "positionoffset": 1, "positionoffsetsdevic": 1, "positionoffsetshost": 1, "positionoffsetshostcopi": 1, "posix": 0, "posix_debug_fallback": 0, "possibl": [2, 3, 5, 6, 9, 12, 16, 20, 27, 28, 29, 36, 65, 71, 72, 73, 74, 77, 79, 81, 82, 89, 92, 93, 96], "possibli": [1, 8, 82], "post": [0, 15, 22, 25, 26, 27, 28, 59, 71, 72, 82, 88, 93], "post_act_fn": 83, "post_attention_layernorm": [17, 95], "post_input_id": 87, "post_layernorm": [14, 15, 17, 82, 92], "post_pad": 82, "post_prompt": 87, "post_strid": 82, "posterior_threshold": [43, 44, 70], "posterioralpha": 1, "posterioralphahost": 1, "posteriorthreshold": [0, 1], "posteriorthresholdhost": 1, "postprocess": [30, 83], "postprocessor": [0, 70], "postprocparam": 70, "potenti": [0, 1, 8, 12, 28, 29, 72, 73, 77, 95], "pow": 82, "power": [9, 16, 23, 25, 26, 28, 71, 79, 93], "pp": [0, 2, 6, 10, 21, 24, 30, 73, 75, 82, 93], "pp2": 73, "pp_communicate_final_output_id": 87, "pp_communicate_new_token": 87, "pp_reduce_scatt": [29, 80], "pp_size": [15, 16, 30, 37, 73, 74, 76, 86, 93], "ppreducescatt": 1, "pr": 26, "practic": [5, 8, 16, 22, 23, 26, 28, 88, 89, 93], "pre": [0, 1, 3, 5, 15, 18, 65, 67, 70, 71, 73, 82, 88, 89, 93, 97], "pre_input_id": 87, "pre_layernorm": 82, "pre_onli": 83, "pre_pad": 82, "pre_prompt": 87, "pre_quant_scal": [15, 70], "pre_strid": 82, "prebuilt": 65, "preced": [16, 82], "precis": [1, 6, 17, 21, 25, 29, 64, 73, 77, 80, 85, 88, 89, 91, 93], "precompute_relative_attention_bia": 84, "precomputed_relative_attent": 83, "predefin": [12, 95, 97], "predict": [1, 5, 12, 26, 27, 93], "predicteddraftlogit": 1, "predictor": 12, "predictsdrafttoken": 1, "prefer": [25, 65], "prefer_managed_weight": 83, "prefer_plugin": 82, "prefetch": 26, "prefil": [0, 28, 70, 78], "prefix": [3, 12, 15, 27, 51, 69, 75, 82, 85, 92], "preliminari": [21, 23, 24], "preload": 17, "premis": 27, "prepar": [0, 1, 2, 26, 27, 51, 56, 64, 72, 79, 82, 84, 90, 93, 97], "prepare_dataset": [20, 56, 72, 73, 74, 75], "prepare_input": [84, 89], "prepare_position_ids_for_cogvlm": 87, "prepare_recurrent_input": 84, "prepare_resourc": [96, 98], "prepareforward": 1, "prepend": 92, "preprocess": [17, 87, 90], "preprocess_weights_hook": 84, "preprocessor": 73, "prequant_scaling_factor": 15, "prerequisit": [64, 66, 67], "presenc": [6, 16, 51], "presence_penalti": [70, 87, 93], "presencepenalti": [0, 1, 6], "present": [0, 70, 73, 79, 80, 90, 93], "preserv": 77, "presid": [40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 59, 66, 67, 75, 81, 88, 94], "pretrain": 18, "pretrained_config": 95, "pretrained_model_name_or_path": 84, "pretrainedconfig": [14, 19, 70, 84, 85, 95], "pretrainedmodel": [19, 84, 89], "pretrainedtokenizerbas": 70, "prevdrafttokenslen": 1, "prevent": [26, 28, 64, 69], "preview": 93, "previou": [1, 3, 4, 12, 19, 20, 22, 27, 73, 75, 76, 77, 79, 80, 81, 93], "previous": [1, 21, 77, 79, 81, 93], "prevscor": 1, "prewritten": 88, "price": 73, "primari": [0, 1, 8, 25, 89, 99], "primarili": 97, "primit": [16, 28, 71, 88], "print": [1, 5, 30, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 60, 61, 62, 66, 67, 70, 73, 74, 75, 81, 88, 89, 92, 94], "print_iter_log": [20, 56, 70], "prior": [3, 29, 65, 67], "priorit": [25, 79, 81], "prioriti": [0, 1, 8, 9, 17, 70], "prioritytyp": 0, "priorityupd": 0, "privat": [0, 1, 6, 70], "privileg": 7, "prm": 91, "pro": 26, "prob": 82, "probabilist": 83, "probabl": [0, 1, 6, 9, 12, 26, 27, 70, 82, 87, 93], "probil": 1, "problem": [5, 20, 28, 92], "proc": 17, "proccessed_weight": 17, "proccessed_zero": 17, "procedur": 20, "proceed": 16, "process": [0, 1, 2, 3, 5, 6, 8, 11, 12, 15, 16, 19, 20, 26, 27, 28, 29, 40, 45, 50, 52, 55, 56, 57, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 79, 80, 81, 82, 87, 88, 92, 93, 95, 96, 97, 99], "process_input": 87, "process_logits_including_draft": 87, "processor": [0, 5, 40, 41, 53, 70, 84, 87, 93], "processorbatch": 0, "processormap": 0, "prod": 82, "produc": [0, 1, 3, 7, 16, 36, 73, 75, 77, 79, 80, 82, 93], "product": [4, 5, 12, 16, 23, 71, 79, 80, 81, 82, 88, 97], "profil": [2, 29, 30, 38, 39, 64, 77, 79, 82, 87, 89, 92, 93], "profiling_verbos": [29, 70], "profit": [12, 73], "program": [2, 19, 40, 42, 46, 47, 49, 50, 52, 66, 67, 69, 81, 88, 92], "progress": [1, 26, 70, 73, 82], "proj": [15, 17, 92], "project": [5, 10, 28, 59, 65, 82, 83, 95, 98], "projector_hidden_act": 84, "prologu": [55, 56, 57], "promin": 12, "promis": [12, 19, 27], "prompt": [0, 3, 6, 9, 14, 20, 29, 30, 35, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 62, 64, 66, 67, 70, 73, 75, 79, 80, 81, 83, 87, 88, 93, 94, 97], "prompt_adapter_request": [70, 93], "prompt_embedding_t": [83, 84, 87], "prompt_embedding_table_s": 84, "prompt_id": 52, "prompt_len": 97, "prompt_logprob": 70, "prompt_lookup": [12, 93], "prompt_lookup_num_token": [6, 70], "prompt_tabl": 87, "prompt_task": [84, 87], "prompt_token": 88, "prompt_token_id": [36, 53, 70], "prompt_vocab_s": [84, 87], "promptadapterrequest": 70, "promptinput": [70, 93], "promptlen": 0, "prompttableoffload": 0, "prompttuningconfig": 0, "prompttuningembed": 83, "prompttuningen": 1, "pronounc": 12, "proof": 98, "propag": [9, 93], "proper": [2, 73], "properli": [17, 79, 81], "properti": [3, 45, 70, 82, 84, 85, 87], "proport": 5, "propos": [0, 26], "protect": [1, 40, 50, 66, 67, 69, 88], "protocol": [0, 30, 45], "proud": 26, "prove": [12, 28], "provid": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19, 20, 21, 22, 25, 26, 28, 29, 30, 31, 36, 45, 54, 59, 65, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 87, 89, 91, 92, 93, 95, 96, 97], "proxy_dispatch_result_thread": 73, "prune": [7, 12, 82], "pseudo": [5, 82, 90], "pth": [17, 93], "ptq": [25, 77, 93], "ptr": 1, "ptr_idx": 17, "ptrdiff_t": 1, "ptuning_setup": 87, "ptuning_setup_fuyu": 87, "ptuning_setup_llava_next": 87, "ptuning_setup_phi3": 87, "ptuning_setup_pixtr": 87, "ptuningconfig": 0, "public": [0, 1, 25, 32, 54, 59], "publish": [20, 21, 24, 73, 74, 93], "pull": [18, 20, 65, 88, 93], "puneeshkhanna": 93, "purchas": 73, "pure": 87, "purpos": [5, 8, 28, 65, 75, 77, 79, 80], "pursu": [42, 46, 47, 49, 50, 52], "push": [28, 31, 53], "pushd": 92, "put": [1, 15, 26, 55, 56, 57, 69, 71, 79], "pwd": [20, 65], "py": [3, 4, 5, 7, 10, 12, 13, 14, 15, 16, 17, 19, 20, 26, 27, 28, 52, 55, 56, 65, 67, 69, 72, 73, 74, 75, 76, 77, 82, 85, 87, 88, 92, 93, 95, 96, 98, 99], "py3": 93, "py_executor_cr": 99, "pybind": 93, "pybind11_object": 70, "pybindmirror": 70, "pydant": [70, 93], "pydantic_cor": 70, "pyexecutor": [93, 98, 99], "pynvml": 93, "pypi": [65, 93], "python": [1, 5, 6, 7, 10, 12, 14, 16, 18, 19, 20, 27, 28, 30, 36, 47, 48, 64, 66, 67, 69, 72, 73, 74, 75, 76, 88, 90, 93, 95, 96, 98, 99], "python3": [10, 13, 15, 20, 55, 56, 65, 67, 72, 73, 88, 92], "python_bind": 20, "python_e2": 87, "python_plugin": 93, "pythonpath": [20, 56, 57], "pytorch": [7, 12, 15, 18, 20, 27, 28, 30, 37, 51, 55, 56, 57, 64, 65, 66, 67, 70, 74, 82, 93, 96, 97, 98, 99], "pytorch_backend_config": 30, "pytorch_eagle_weights_path": 70, "pytorch_extra_arg": 56, "pytorch_model": 92, "pytorch_model_engin": 96, "pytorch_model_registri": 98, "pytorchconfig": [70, 97], "pytorchmodelengin": [96, 98], "pzzzzz5142": 93, "q": [2, 5, 6, 10, 21, 26, 28, 64, 73, 82, 92, 95, 97], "q_b_proj": 82, "q_dim": 82, "q_lora_rank": [82, 83], "q_proj": [17, 95], "q_scale": [5, 82, 83, 84], "qa": 12, "qformat": [73, 86], "qgmma": 93, "qingquansong": 93, "qk_layernorm": [83, 84], "qk_nope_head_dim": [82, 83], "qk_norm": 83, "qk_rope_head_dim": [82, 83], "qkv": [7, 10, 15, 17, 64, 82, 92, 93, 97], "qkv_bia": [82, 93], "qkv_dim": 82, "qkv_proj": 95, "qo_indptr": 97, "qpi": 11, "qserv": 93, "quadrat": [5, 89], "qualiti": [27, 77, 80], "qualnam": [70, 82, 84, 86], "quant": [19, 70, 73, 82, 93, 94], "quant_algo": [15, 17, 19, 36, 59, 70, 73, 77, 84], "quant_and_calib_config": 59, "quant_config": [19, 36, 59, 70, 77, 84, 97], "quant_medusa_head": 86, "quant_mod": [19, 70, 83, 84, 87], "quantalgo": [36, 59, 70, 77, 84, 86], "quantconfig": [19, 36, 59, 70, 77, 84, 93, 97], "quanticonfig": 19, "quantiz": [5, 6, 11, 16, 17, 20, 21, 22, 26, 28, 29, 40, 41, 46, 54, 64, 67, 68, 69, 70, 71, 74, 75, 78, 82, 83, 84, 87, 88, 91, 93, 95, 97], "quantizaton": 73, "quantize_and_export": 86, "quantize_kwarg": 84, "quantize_lm_head": [86, 93], "quantized_valu": 5, "quantizedkernel": 16, "quantizetensorplugin": 16, "quantmod": [1, 5, 6, 64, 70, 82, 83, 84, 86, 87], "quantmodewrapp": [70, 82], "queri": [3, 6, 8, 12, 16, 21, 28, 30, 64, 73, 82, 89, 97, 98], "query_dim": 83, "query_key_valu": 17, "query_length": 83, "query_pre_attn_scalar": 84, "question": [58, 73, 89, 92], "queu": [0, 74, 79], "queue": [0, 70, 71, 96], "quick": [5, 64, 71, 73, 75, 97], "quick_gelu": 82, "quicker": 76, "quickli": [19, 88], "quickstart": [69, 75], "quickstart_advanc": [27, 55], "quit": [7, 69], "qweight": 17, "qwen": [17, 30, 39, 69, 73, 82, 90, 91, 93], "qwen1": [91, 93], "qwen2": [10, 30, 34, 39, 61, 73, 91, 93], "qwen2_5_vlforconditionalgener": 91, "qwen2audio": 93, "qwen2forcausallm": 91, "qwen2forprocessrewardmodel": 91, "qwen2forrewardmodel": 91, "qwen2forsequenceclassif": 93, "qwen2vl": 93, "qwen2vlforconditionalgener": 91, "qwenforcausallm": 17, "qwenforcausallmgenerationsess": 87, "qwenvl": 93, "qwq": 91, "qychen": 10, "qzero": 17, "r": [1, 10, 30, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 66, 67, 75, 81, 82, 88, 92, 93, 94], "r1": [30, 63, 74, 93], "r1_in_tensorrt": [26, 93], "race": 93, "radix": 98, "rai": 1, "rais": [19, 70, 75, 92, 93], "rand": 82, "rand_data": 82, "rand_data_sampl": 84, "rand_data_valid": 84, "random": [0, 6, 30, 38, 39, 70, 74, 82, 93], "random_se": [70, 84, 87], "randomdatasampl": 1, "randomdatavalid": 1, "randomli": 74, "randomse": [1, 6, 93], "randomseedtyp": 0, "rang": [0, 6, 9, 12, 72, 80, 82, 84, 89, 90, 91, 92, 95], "rank": [0, 1, 2, 3, 4, 6, 10, 19, 20, 28, 29, 69, 73, 82, 84, 87, 89, 92, 93], "rank0": 15, "rank1": 15, "rapid": [12, 74, 88], "rate": [0, 20, 26, 27, 28, 30, 38, 39, 73, 74, 75, 93], "rather": [5, 7, 12, 28, 67, 71], "ratio": 28, "rational": 28, "raw": 30, "raw_audio": 87, "raw_imag": 87, "rdma": 2, "re": [20, 25, 70, 71, 93, 97], "reach": [0, 5, 15, 69, 73, 77, 81], "read": [0, 2, 3, 5, 12, 14, 16, 17, 20, 26, 27, 29, 58, 70, 73, 93], "read_config_from_the_custom_training_checkpoint": 19, "readabl": 73, "reader": 82, "readi": [0, 88], "readm": [2, 12, 30, 69, 75, 93], "real": [7, 20, 26, 65, 75, 77, 79, 80, 82, 92], "realiti": 79, "realiz": [9, 12], "rearrang": 82, "reason": [0, 5, 6, 16, 19, 26, 27, 30, 63, 70, 73, 76, 79, 80, 82, 92], "reasoning_pars": [30, 37], "rebuild": [80, 82, 92], "receiv": [0, 1, 2, 3, 4, 11, 12, 77, 82, 93], "recent": [1, 4, 5, 22, 26], "recip": [26, 28, 30, 70, 90], "reclaim": 0, "recogn": [12, 26, 73, 95], "recommend": [2, 5, 6, 12, 14, 17, 18, 20, 22, 25, 28, 30, 52, 65, 70, 73, 78, 79, 81, 92, 93, 95, 97], "recompute_scale_factor": 82, "reconfigur": [3, 67], "reconstruct": [5, 82], "record": [1, 7, 20, 26, 27, 70], "recored": 0, "recreat": 18, "recurr": 12, "recurrentgemma": [90, 91, 93], "recurrentgemmaforcausallm": 84, "recurs": [20, 65, 69], "recv": [0, 16, 82], "recvconnect": 0, "recvpollperiodm": 0, "recycl": [5, 98], "redesign": 93, "redirect": [7, 70], "redraft": [64, 82, 87, 93], "redrafter_draft_len_per_beam": 87, "redrafter_inverted_temperatur": 84, "redrafter_num_beam": 87, "redrafterforcausallm": 84, "reduc": [2, 3, 4, 5, 9, 11, 12, 16, 20, 21, 24, 26, 27, 28, 29, 65, 69, 71, 72, 73, 74, 75, 76, 79, 81, 82, 89, 92, 93, 97], "reduce_fus": [29, 73, 77, 80], "reduce_scatt": 82, "reduceoper": 82, "reducescatt": [29, 80, 93], "reduct": [11, 12, 26, 81, 82], "redund": [12, 26], "refactor": [19, 27, 93], "refer": [0, 1, 2, 3, 5, 6, 7, 8, 10, 12, 16, 18, 19, 20, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 60, 61, 62, 65, 69, 71, 73, 74, 75, 76, 77, 78, 80, 82, 88, 91, 93, 95, 97], "referenc": 77, "reference_wrapp": [0, 3], "refin": 93, "refit": [16, 29, 93], "refit_engin": 16, "reflect": 79, "refresh": 73, "regard": 82, "regardless": 92, "regex": [3, 70], "region": 72, "regist": [31, 64, 92, 93, 95], "register_auto_model": 95, "register_network_output": 92, "registerdesc": 0, "registermemori": 0, "regress": [5, 6, 16], "regular": [0, 3, 5, 26, 70, 82], "reinforc": 78, "reject": [0, 27], "rel": [9, 21, 79, 81, 82, 93], "rel_attn_t": 83, "relat": [2, 4, 8, 17, 64, 71, 72, 82, 85, 89, 92, 93, 94, 95, 98], "relationship": 89, "relative_attent": [82, 83], "relative_attention_bia": 82, "relax": 5, "relaxed_delta": [26, 27, 70], "relaxed_topk": [26, 27, 70], "releas": [1, 5, 6, 8, 19, 21, 24, 25, 64, 71, 82, 84, 89, 90, 91], "release_build": 65, "release_run": [65, 88], "releasepag": 1, "releasest": 0, "relev": [6, 65, 98], "reli": [2, 5, 7, 19, 69, 72, 90], "reload": 3, "relu": [15, 16, 82, 92], "remain": [0, 7, 9, 12, 13, 26, 65, 74, 75, 77, 79, 80, 82, 89, 93], "remaind": 77, "remark": [26, 27], "remind": [5, 97], "remot": 70, "remotenam": 0, "remov": [0, 1, 5, 6, 7, 8, 16, 17, 20, 27, 29, 30, 54, 65, 70, 71, 77, 82, 89, 93, 95], "remove_const_t": 1, "remove_cv_t": 0, "remove_duplicated_kv_head": 84, "remove_input_pad": [5, 10, 29, 82, 83, 87], "remove_pointer_t": 1, "remove_reference_t": 1, "remove_sequ": 98, "renam": 93, "reorder": [82, 83], "reorder_kv_cache_for_beam_search": 87, "rep": 72, "repeat": [0, 5, 27, 28, 70, 82], "repeat_interleav": 82, "repeatedli": 12, "repetit": [0, 6, 70, 82], "repetition_penalti": [6, 70, 87, 93], "repetitionpenalti": [0, 1, 6], "replac": [1, 4, 7, 16, 17, 19, 20, 28, 73, 75, 77, 81, 82, 89, 95], "replace_add_with_sub": 7, "replace_all_uses_with": [7, 82], "replace_input_with": 7, "replace_output_uses_with": 7, "replace_outputs_uses_with": 7, "replic": [0, 3, 26, 82], "replit": [90, 91, 93], "repo": [19, 69, 71, 75, 92], "repo_id": 58, "report": [8, 27, 28, 72, 73, 74, 89, 93], "reportpluginerror": 92, "repositori": [12, 18, 20, 31, 69, 88], "repres": [0, 1, 2, 8, 12, 20, 21, 25, 26, 45, 58, 70, 73, 79, 82, 87, 99], "represent": [7, 16], "reproduc": [64, 73, 93], "req": [20, 73, 74, 75, 77, 79, 80], "req_id": 52, "req_logit": 52, "req_stat": 99, "req_token_id": 52, "reqbeamwidth": 1, "reqid": 0, "reqpromptlength": 1, "request": [0, 2, 5, 6, 9, 10, 16, 20, 22, 24, 27, 28, 29, 30, 38, 39, 52, 56, 70, 71, 72, 73, 74, 75, 77, 79, 80, 81, 82, 88, 89, 93, 96, 97, 98, 99], "request_id": [36, 53, 70, 97], "request_stats_max_iter": 70, "request_timeout": 30, "request_typ": 70, "request_type_context_and_gener": [0, 2], "request_type_context_onli": [0, 2], "request_type_generation_onli": [0, 2], "requesterror": 70, "requestid": [0, 2, 3], "requestidtyp": 0, "requestlist": 99, "requestoutput": [36, 53, 70, 93], "requestperfmetr": 0, "requestschedul": 99, "requeststag": 0, "requeststat": 0, "requeststatsmaxiter": 0, "requeststatsperit": 0, "requeststatsperiter": 0, "requeststatsvec": 0, "requesttoken": 3, "requesttyp": [0, 1, 2, 70], "requesttypesdevic": 1, "requestvector": 1, "requir": [0, 2, 5, 6, 9, 10, 12, 16, 17, 19, 20, 21, 25, 26, 28, 29, 30, 45, 58, 65, 66, 67, 70, 73, 74, 75, 76, 77, 80, 82, 83, 88, 89, 91, 92, 93, 98], "require_ln_f": 84, "requiresattentionmask": 1, "rerun": 80, "rescale_output_factor": 83, "research": [5, 27, 32, 42, 46, 47, 49, 50, 90], "resembl": 51, "reserv": [0, 1, 30, 70, 81, 87, 89, 99], "reserved_block": 99, "reset": [0, 1, 6, 70, 73, 87], "resetspeculativedecodingmodul": 1, "reshap": [1, 82], "resid": [10, 59], "residu": [82, 92], "residual_connect": 83, "residual_mlp": 84, "residual_multipli": 84, "residual_rms_norm": 82, "residual_rms_norm_out_quant_fp8": 82, "residual_rms_norm_out_quant_nvfp4": 82, "residual_rms_norm_quant_fp8": 82, "residual_rms_norm_quant_nvfp4": 82, "residual_rms_prepost_norm": 82, "residualadd": [29, 80, 93], "resiz": 1, "resolv": [34, 61, 92], "resourc": [0, 2, 5, 19, 26, 28, 96, 98, 99], "respect": [4, 36, 81, 82, 87, 89, 90, 95, 99], "respons": [0, 2, 8, 30, 36, 60, 61, 62, 70, 73, 82, 96], "responsewithid": 0, "rest": [1, 5, 77], "restart": 0, "restrict": [0, 2, 3, 6, 65, 70, 82], "result": [0, 1, 4, 5, 11, 12, 16, 21, 22, 23, 25, 27, 28, 29, 36, 64, 65, 70, 73, 76, 77, 78, 79, 80, 82, 83, 93, 95, 97, 99], "retail": 73, "retain": [21, 23], "retent": [0, 70], "retentionprior": 0, "retentionpriorityanddur": 0, "rethink": 12, "retriev": [1, 17, 70, 74, 82], "return": [0, 1, 3, 7, 10, 12, 14, 16, 17, 19, 36, 70, 73, 79, 82, 83, 84, 87, 89, 92, 93, 98, 99], "return_all_generated_token": 87, "return_context_logit": 70, "return_dict": 87, "return_encoder_output": [70, 87], "return_generation_logit": 70, "return_perf_metr": 70, "returnallgeneratedtoken": [0, 3], "returncontextlogit": 0, "returnencoderoutput": 0, "returngenerationlogit": 0, "returnlogprob": 0, "returnperfmetr": 0, "reus": [0, 2, 3, 8, 27, 29, 64, 68, 70, 82, 87, 89, 93, 95, 98], "reusabl": [8, 9], "reusedblock": 0, "reusedblocksperrequest": 0, "reveal": [26, 28], "revers": 82, "revert": 82, "review": 73, "revis": 70, "revolution": 71, "rewind": [27, 93], "rewrit": [64, 82, 93, 95], "rewritepatternmanag": 7, "rewrt": 92, "rf": 92, "rg_lru": 82, "rgc": 73, "rh": [0, 1], "rich": 15, "right": [71, 77, 82, 92], "rigor": [51, 73], "risk": [2, 16, 77, 81], "rm": [65, 82, 91, 92, 95], "rms_norm": [26, 82, 95], "rmsnorm": [10, 26, 82, 83, 84, 93, 95], "rnn": [29, 93], "rnn_conv_dim_s": 87, "rnn_head_siz": 87, "rnn_hidden_s": 87, "rnn_state": 84, "rnnconfig": 1, "rnnconvdims": 1, "rnnheadsiz": 1, "rnnhiddens": 1, "ro": 20, "roberta": [91, 93], "robertaforquestionansw": 84, "robertaforsequenceclassif": 84, "robertamodel": 84, "robin": 2, "robust": [26, 93], "rock": 82, "role": [16, 30, 33, 34, 45, 60, 61, 79, 88], "roll": 64, "rooflin": 28, "root": [15, 20, 31, 65, 67, 69, 70, 75, 82, 88], "root_lay": 7, "rope": [26, 28, 82, 87, 93, 97], "rope_gpt_neox": [5, 82, 84], "rope_gptj": [5, 82], "rope_local_base_freq": 84, "rope_scaling_config": 82, "rope_scaling_long_factor": 83, "rope_scaling_long_mscal": 83, "rope_scaling_short_factor": 83, "rope_scaling_short_mscal": 83, "ropeembeddingutil": 82, "rotari": [0, 26, 82, 87, 95, 97], "rotary_bas": 84, "rotary_cos_sin": 82, "rotary_dim": 84, "rotary_embed": 95, "rotary_embedding_bas": [82, 83], "rotary_embedding_base_loc": 83, "rotary_embedding_beta_fast": 83, "rotary_embedding_beta_slow": 83, "rotary_embedding_dim": [5, 82, 84], "rotary_embedding_long_m_scal": 82, "rotary_embedding_max_posit": 82, "rotary_embedding_mscal": 83, "rotary_embedding_mscale_all_dim": 83, "rotary_embedding_origin_max_posit": 83, "rotary_embedding_original_max_posit": 82, "rotary_embedding_percentag": 83, "rotary_embedding_sc": 83, "rotary_embedding_scal": 82, "rotary_embedding_scale_typ": 82, "rotary_embedding_short_m_scal": 82, "rotary_inv_freq": [82, 83], "rotary_inv_freq_loc": 83, "rotary_pct": 84, "rotary_sc": [83, 84], "rotaryembed": 95, "rotaryembeddingdim": [0, 1], "rotaryscalingtyp": 82, "rotate_every_two": 82, "rotate_half": 82, "round": [2, 70, 82], "rout": [2, 28], "router": [4, 10, 28, 93], "router_gemm": 26, "routin": 7, "routingkernel": 26, "row": [10, 79, 82, 90, 93], "rowlinear": [10, 83], "rowwis": 70, "rr": 93, "rslora": 93, "rst": 3, "rtx": 93, "rubric": 82, "rule": [5, 76, 92], "run": [0, 1, 2, 3, 5, 6, 9, 11, 12, 14, 15, 16, 21, 25, 26, 28, 29, 30, 31, 32, 47, 48, 52, 55, 56, 57, 64, 65, 66, 67, 69, 70, 71, 76, 77, 79, 80, 81, 82, 84, 87, 89, 90, 92, 93, 95, 96, 97, 98], "run_dtm_pld": 12, "run_medusa_decod": 54, "runner": [0, 15, 87], "runningleon": 93, "runpod": 31, "runtim": [0, 3, 5, 12, 13, 18, 26, 27, 29, 30, 49, 52, 58, 64, 70, 71, 72, 73, 75, 78, 79, 82, 83, 84, 88, 92, 93, 95, 97, 99], "runtime_config": [36, 49], "runtime_default": 84, "runtime_error": 1, "runtime_rank": 87, "runtimedefault": [0, 84], "runtimedefaultsin": 84, "runtimeerror": [69, 70, 92], "runtimetensor": 87, "s0": 5, "s1": 5, "s2": 5, "sacrif": 26, "sad": 87, "saeyoonoh": 93, "safe": [1, 7, 28, 80], "safer": 82, "safetensor": [15, 17, 92, 93], "sage_attn": 82, "sage_attn_k_block_s": 82, "sage_attn_k_quant_s": 82, "sage_attn_q_block_s": 82, "sage_attn_q_quant_s": 82, "sage_attn_v_block_s": 82, "sage_attn_v_quant_s": 82, "sageattent": 82, "sai": [72, 75, 79], "said": 77, "sake": 79, "sale": 73, "same": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 19, 22, 27, 28, 29, 52, 55, 56, 57, 65, 69, 70, 73, 74, 77, 80, 81, 82, 83, 85, 87, 89, 93], "sampl": [0, 1, 3, 5, 16, 18, 20, 26, 27, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 58, 59, 64, 68, 70, 72, 73, 74, 82, 83, 87, 93], "sample_proj_bia": 83, "sample_weight_strip": 93, "samplemod": 82, "sampler": 70, "sampling_config": 87, "sampling_param": [36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 66, 67, 70, 75, 81, 88, 93, 94], "samplingconfig": [0, 3, 6, 36, 87, 93], "samplingparam": [36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 66, 67, 70, 75, 81, 88, 93, 94], "saniti": [66, 67, 76, 77, 80], "santacod": [69, 90, 91], "satfinit": 90, "satisfi": [6, 17, 93], "save": [5, 9, 12, 19, 20, 27, 28, 29, 31, 46, 49, 69, 70, 72, 73, 77, 80, 81, 89, 93], "save_checkpoint": [19, 84], "save_config": [19, 84], "saw": [77, 88], "sbatch": [16, 55, 56, 57], "sbsa": [93, 94], "scaffold": [93, 95], "scalar": [6, 11, 82], "scalartyp": 93, "scale": [0, 6, 10, 17, 28, 29, 70, 77, 82, 83, 90, 93], "scale_d0": 82, "scale_d1": 82, "scale_factor": 82, "scale_output": 82, "scale_qk": 83, "scale_typ": 82, "scalia": [42, 46, 47, 49, 50], "scaling_factor": 82, "scaling_long_factor": 82, "scaling_short_factor": 82, "scalingvecpoint": 1, "scanreducetempstorag": 1, "scanreducetempstoragebyt": 1, "scantempstorag": 1, "scantempstoragebyt": 1, "scatter": [7, 82], "scatter_nd": 82, "scenario": [2, 5, 11, 12, 15, 20, 23, 25, 26, 28, 29, 32, 73, 74, 75, 77, 79, 80, 93], "scfg": 87, "schedul": [0, 2, 3, 9, 10, 20, 27, 28, 29, 30, 51, 70, 73, 75, 80, 89, 93, 94], "schedule_request": 99, "scheduled_request": 99, "scheduler_config": [70, 81], "schedulerconfig": [0, 70, 81, 93], "schedulerpolici": 93, "schema": [0, 3, 45, 70, 73], "scheme": 0, "scicod": 26, "scienc": [42, 46, 47, 49, 50, 52], "scope": [18, 27, 93], "score": [6, 28], "scout": 91, "scratch": [73, 75, 76, 80], "script": [10, 14, 16, 19, 20, 31, 55, 56, 57, 65, 69, 72, 73, 74, 75, 85, 90, 92, 93, 94, 95], "sd3": 83, "sd35adalayernormzerox": 83, "sd3patchemb": 83, "sd3transformer2dmodel": 84, "sd3transformer2dmodelconfig": 84, "sdxl": 93, "seamless": 93, "search": [0, 1, 3, 6, 12, 18, 24, 29, 30, 36, 49, 64, 70, 77, 79, 82, 93, 96], "seashor": [34, 61], "seat": [42, 46, 47, 49, 50], "sec": [20, 22, 73, 74, 75, 77, 79, 80], "second": [1, 3, 6, 9, 10, 12, 20, 21, 23, 24, 26, 70, 79, 82], "secondari": [0, 8, 70, 89], "secondary_offload_min_prior": 70, "secondaryoffloadminprior": 0, "secondli": 79, "section": [3, 6, 16, 17, 19, 20, 27, 28, 30, 65, 69, 71, 73, 75, 77, 78, 79, 80, 82, 88, 91, 93, 97], "section_s": 82, "secur": [45, 93], "securityprotocol": 45, "see": [0, 1, 5, 6, 8, 12, 16, 17, 20, 21, 23, 24, 25, 27, 28, 30, 31, 32, 34, 40, 61, 67, 73, 74, 75, 77, 79, 80, 81, 82, 83, 84, 89, 90, 92, 93, 98], "seed": [0, 6, 30, 38, 39, 70, 86, 93], "seem": [9, 51, 58, 73, 76], "seen": [12, 20, 73], "segment": 93, "select": [0, 4, 6, 18, 25, 26, 28, 29, 73, 80, 82, 87, 89, 96, 99], "selectcontextid": 0, "selectgenidx": 0, "selective_scan": 82, "self": [0, 5, 7, 14, 16, 17, 52, 70, 73, 82, 84, 87, 92, 95, 98, 99], "self_attent": 17, "self_attention_mask": 83, "self_attention_packed_mask": 83, "self_attn": [17, 95], "selfidx": 0, "sell": 73, "semicolon": 65, "senat": [42, 46, 47, 49, 50], "send": [0, 2, 16, 26, 30, 75, 76, 82, 88, 93], "sens": 77, "sensit": [26, 77], "sent": [0, 12, 28, 30, 70], "sentenc": [0, 6, 70, 88], "separ": [11, 12, 29, 54, 65, 73, 82, 87, 97], "separate_match_rewrit": 7, "seq": [1, 5, 73, 82], "seq_idx": 87, "seq_len": [74, 82, 83, 97], "seq_length": 82, "seq_lens_cuda": 97, "seqlen": [0, 82], "seqslot": 1, "sequenc": [0, 1, 3, 5, 6, 7, 8, 9, 12, 16, 20, 21, 22, 23, 24, 26, 27, 28, 70, 71, 73, 74, 75, 78, 81, 82, 83, 87, 89, 93, 97, 98], "sequence_length": [82, 83, 87, 92], "sequence_length_buff": 87, "sequence_limit_length": 87, "sequenceindex": [0, 3], "sequencelengthscba": 1, "sequencelimitlength": 1, "sequenti": [0, 2, 12, 27, 89], "seri": 93, "serial": [29, 82, 84, 87], "serializ": 70, "serialize_engin": 87, "serializeds": 0, "serializedst": 0, "serv": [0, 2, 3, 5, 8, 12, 16, 18, 24, 25, 33, 34, 35, 37, 38, 39, 40, 41, 60, 61, 62, 64, 70, 80, 93, 96, 97], "server": [0, 9, 12, 16, 18, 22, 31, 33, 34, 35, 37, 38, 39, 60, 61, 62, 64, 93], "server_start_timeout": 30, "servic": [18, 59, 64], "session": [5, 69, 73, 87], "set": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 15, 17, 18, 19, 20, 26, 28, 29, 30, 36, 45, 55, 56, 57, 65, 67, 70, 71, 72, 74, 75, 77, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 92, 93, 99], "set_attn_processor": 84, "set_from_opt": 1, "set_if_not_exist": 84, "set_input_shap": 87, "set_rank": 84, "set_rel_attn_t": 83, "set_shap": 87, "setadditionalmodeloutput": [0, 3], "setallottedtimem": 0, "setbackend": 0, "setbadword": 0, "setbatchingtyp": 0, "setbeamsearchdiversityr": 0, "setbeamwidth": 0, "setbeamwidtharrai": 0, "setbitto": 0, "setcachest": 0, "setcachetransceiverconfig": 0, "setclientid": 0, "setcommst": 0, "setcommunicationmod": 0, "setcommunicationtyp": 0, "setcontextfmha": 1, "setcontextphaseparam": [0, 2], "setcopyonpartialreus": 0, "setcrossattentionmask": 0, "setcrosskvcachefract": 0, "setcudagraphcaches": 0, "setcudagraphmod": 0, "setdatatyp": 1, "setdebugconfig": 0, "setdebuginputtensor": 0, "setdebugoutputtensor": 0, "setdebugtensornam": 0, "setdebugtensorsmaxiter": 0, "setdecodingconfig": 0, "setdecodingmod": 0, "setdeviceid": 0, "seteagleconfig": 0, "seteagleinput": 1, "setearlystop": 0, "setembeddingbia": 0, "setenableblockreus": 0, "setenablechunkedcontext": 0, "setenablecontextfmhafp32acc": 0, "setenablepartialreus": 0, "setenabletrtoverlap": 0, "setencodedvocab": 0, "setencoderhiddens": 1, "setencoderinputfeatur": 0, "setencoderinputtokenid": 0, "setencoderoutputlength": 0, "setendid": 0, "seteventbuffermaxs": 0, "setexecutionconfig": 1, "setexplicitdrafttokensinput": 1, "setextendedruntimeperfknobconfig": 0, "setexternaldrafttokensconfig": 0, "setfreegpumemoryfract": 0, "setfrequencypenalti": 0, "setfrom": 0, "setfrominput": 1, "setgathergenerationlogit": 0, "setgemmallreducedtyp": 1, "setgpuweightsperc": [0, 13], "setguideddecodingconfig": 0, "setguideddecodingparam": 0, "sethostcaches": 0, "setinittozero": 1, "setisorchestr": 0, "setiterstatsmaxiter": 0, "setkvcacheconfig": 0, "setkvcacheretentionconfig": 0, "setkvcachetyp": 1, "setlanguageadapteruid": 0, "setlayertyp": 1, "setlengthpenalti": 0, "setlevel": 1, "setlogitsdtyp": 1, "setlogitspostprocessor": 0, "setlogitspostprocessorconfig": 0, "setlogitspostprocessornam": 0, "setlookaheadconfig": 0, "setlookaheaddecodingconfig": 0, "setloraconfig": 0, "setloramodul": 1, "setmanagedweightsmap": 1, "setmanageweightstyp": 1, "setmaxattentionwindowvec": 0, "setmaxbatchs": [0, 1], "setmaxbeamwidth": [0, 1], "setmaxdraftpathlen": 1, "setmaxdrafttoken": 1, "setmaxencoderlen": 1, "setmaxinputlen": 1, "setmaxlorarank": 1, "setmaxnumpath": 1, "setmaxnumtoken": [0, 1], "setmaxpagesperblock": 1, "setmaxpositionembed": 1, "setmaxpromptembeddingtables": 1, "setmaxqueues": 0, "setmaxseqidlemicrosecond": 0, "setmaxsequencelen": 1, "setmaxtoken": 0, "setmedusachoic": 0, "setmem": 1, "setmemorytyp": 1, "setminp": 0, "setmintoken": 0, "setmlphiddens": 1, "setmodelnam": 1, "setmodelvari": 1, "setmropeconfig": 0, "setmultiblockmod": 0, "setmultimodalembed": 0, "setnbcrosskvhead": 1, "setnbkvhead": 1, "setnorepeatngrams": 0, "setnormalizelogprob": 0, "setnumcopystream": 1, "setnumdecodingenginetoken": 1, "setnumkvheadspercrosslay": 1, "setnumkvheadsperlay": 1, "setnumlanguag": 1, "setnumnod": 0, "setnumreturnsequ": 0, "setonboardblock": 0, "setorchestratorconfig": 0, "setorchleadercomm": 0, "setoutputconfig": 0, "setpadid": 0, "setpagedcontextfmha": 1, "setpagewidth": 1, "setparallelconfig": 0, "setparticipantid": 0, "setpath": 1, "setpeftcacheconfig": 0, "setpositionid": 0, "setppreducescatt": 1, "setpresencepenalti": 0, "setprior": 0, "setprocessorbatch": 0, "setprocessormap": 0, "setprompttableoffload": 0, "setprompttuningconfig": 0, "setquantmod": 1, "setrecvpollperiodm": 0, "setrepetitionpenalti": 0, "setrepl": [0, 3], "setrequeststatsmaxiter": 0, "setrequesttyp": [0, 2], "setreturnallgeneratedtoken": 0, "setrnnconfig": 1, "setrotaryembeddingdim": 1, "setsamplingconfig": 0, "setschedulerconfig": 0, "setse": 0, "setsecondaryoffloadminprior": 0, "setsinktokenlength": 0, "setsizeperhead": 1, "setskipcrossattnblock": [0, 1], "setslotsperpag": 1, "setspawnprocess": 0, "setspecdecconfig": 0, "setspeculativedecodingmod": 1, "setspeculativedecodingmodul": 1, "setstoptokenid": 0, "setstopword": 0, "setstream": 0, "settemperatur": 0, "setter": [0, 6], "settokenizerstr": 0, "settokensperblock": 1, "settopk": 0, "settopp": 0, "settoppdecai": 0, "settoppmin": 0, "settoppresetid": 0, "settotalnumpag": 1, "setup": [1, 5, 29, 45, 55, 56, 57, 67, 76, 77, 87, 88, 89, 93], "setup_fake_prompt": 87, "setup_fake_prompts_qwen2vl": 87, "setup_fake_prompts_vila": 87, "setup_input": 87, "setupeagl": 1, "setupexplicitdrafttoken": 1, "setuplookahead": 1, "setupspeculativedecod": 1, "setuptool": [66, 67], "setusecrossattent": 1, "setusegpudirectstorag": 0, "setusemrop": 1, "setusepositionembed": 1, "setuseshapeinfer": 1, "setusetokentypeembed": 1, "setworkerexecutablepath": 0, "setzero": [0, 1], "seve": 70, "sever": [0, 1, 2, 5, 7, 12, 15, 27, 36, 77, 78, 79, 80, 82, 89, 92, 97], "sft": 58, "sh": [16, 31, 93, 94], "shah": 93, "shaken": 51, "shall": [19, 89], "shape": [0, 1, 5, 7, 10, 15, 16, 26, 28, 70, 80, 82, 84, 87, 89, 90, 92, 93, 97, 98], "shape_cast_dtyp": 82, "shapeequ": 1, "shard": [17, 26, 64, 73, 78, 82, 83], "shard_map": 17, "sharding_along_vocab": 70, "sharding_dim": [82, 83], "share": [1, 2, 3, 5, 7, 8, 9, 10, 12, 19, 20, 25, 26, 27, 28, 29, 65, 76, 77, 82, 83, 93], "share_embed": 93, "share_weight": 83, "shared_embedding_t": 93, "shared_fc1": 28, "shared_fc2": 28, "shared_ptr": [0, 1], "sharedconstptr": 1, "sharedptr": 1, "shelf": 93, "sherlock113": 93, "shift": [11, 27], "ship": [19, 51], "shm": 92, "short": [5, 73, 77, 79], "short_mscal": [82, 83], "shorter": [5, 74], "shot": 93, "should": [0, 1, 2, 3, 7, 9, 10, 11, 19, 20, 28, 36, 42, 45, 46, 47, 49, 50, 52, 53, 55, 56, 57, 58, 65, 70, 73, 74, 75, 76, 80, 81, 82, 83, 85, 87, 89, 93, 95, 97, 98, 99], "should_stop": 87, "shouldus": 5, "show": [2, 3, 16, 22, 26, 27, 28, 30, 40, 74, 75, 79, 80, 88, 89, 91, 94], "showcas": [77, 80, 88], "shown": [11, 23, 27, 30, 65, 69, 73, 75, 77, 79, 80, 82], "shrunk": 82, "shuffl": 82, "shut": 2, "shutdown": [0, 59, 69, 70], "si": 5, "sibl": 16, "side": [3, 82], "side_stream_id": 82, "sidestreamidtyp": 82, "sigh": 58, "sigmoid": [16, 82], "signal": 0, "signatur": [7, 52, 82], "signifi": 79, "signific": [3, 5, 8, 23, 27, 28, 58, 76, 77, 79, 80], "significantli": [25, 26, 27, 28, 75, 76, 77, 79, 80, 89, 97], "silicon": 28, "silu": [16, 82, 83], "similar": [0, 5, 6, 7, 12, 20, 21, 23, 27, 36, 49, 53, 72, 73, 81, 82, 96, 99], "similarli": 12, "simpl": [2, 7, 8, 12, 16, 40, 52, 65, 69, 71, 74, 88, 94], "simpler": 12, "simpleschedul": 99, "simplest": 82, "simpli": [5, 12, 71, 73, 74, 79, 88, 92, 95], "simplic": 19, "simplifi": [5, 19, 73, 79, 82, 93], "simultan": [12, 79], "sin": [0, 82, 83], "sinc": [0, 1, 4, 5, 7, 9, 12, 13, 19, 20, 27, 28, 31, 36, 65, 70, 73, 75, 76, 77, 79, 80, 82, 84, 89, 96, 98, 99], "sincer": 28, "sinco": 83, "singl": [0, 1, 2, 3, 4, 5, 6, 8, 12, 14, 16, 19, 20, 23, 24, 26, 27, 28, 29, 34, 52, 61, 69, 70, 72, 73, 77, 80, 82, 84, 88, 89, 90, 93, 95, 96, 97, 98], "singleton": [7, 82], "sink": [0, 1, 5, 70, 87], "sink_token_len": 87, "sink_token_length": [5, 70, 87], "sinktokenlength": [0, 1], "sinusoid": 83, "sit": [19, 58], "situaiton": 74, "situat": [12, 58, 64, 75, 79], "six": 27, "size": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 13, 20, 22, 23, 25, 26, 27, 28, 29, 30, 36, 52, 55, 56, 57, 64, 70, 72, 73, 74, 75, 76, 77, 78, 80, 82, 83, 84, 87, 92, 93, 97, 99], "size_t": [0, 1], "size_typ": [0, 1], "sizeof": 1, "sizeperhead": [0, 1], "sizetype32": [0, 1], "sizetype64": [0, 1], "skip": [0, 1, 7, 17, 20, 32, 59, 65, 70, 82, 99], "skip_attn": [82, 83], "skip_cross_attn_block": [84, 87], "skip_cross_kv": [83, 87], "skip_encod": 87, "skip_special_token": [70, 93], "skip_tokenizer_init": [36, 70], "skipcrossattnblock": [0, 1], "sku": [75, 77, 79, 80], "skywork": [90, 91, 93], "sleep": 32, "slice": [1, 4, 17, 82, 93], "slice_shap": 17, "sliceinputtyp": 82, "slicen": 1, "slide": [8, 64, 81, 82, 87, 93], "slider": [20, 26, 73], "sliding_window": 84, "sliding_window_caus": 82, "sliding_window_pattern": 84, "slight": [20, 27, 28, 77, 79, 80], "slightli": [0, 2, 10, 11, 30, 77, 80], "slope": [5, 82], "slot": [0, 1, 93], "slot_map": [82, 84], "slotidx": 1, "slotsperpag": 1, "slow": [3, 9, 70, 71, 76], "slower": [8, 19, 28, 76], "slowest": 5, "slurm": [16, 55, 56, 57, 67, 69, 92, 93], "sm": [91, 93], "sm120": 93, "sm80": [91, 93], "sm86": [91, 93], "sm89": [91, 93], "sm90": [91, 93], "small": [5, 9, 11, 12, 16, 25, 26, 27, 28, 75, 77, 79, 80, 82, 89, 92, 93], "smaller": [1, 12, 20, 27, 29, 72, 73, 76, 79, 80, 81, 82, 89, 93], "smallest": [0, 1, 8, 82], "smart": 82, "smaug": [91, 93], "smi": [20, 26, 73, 89], "smile": 58, "smith": [42, 46, 47, 48, 49, 50, 52, 59], "smooth": [19, 70, 93], "smoother": 20, "smoothquant": [7, 25, 64, 93], "smoothquant_v": 70, "snapshot": 73, "snapshot_download": 58, "snip": 73, "snippet": [73, 93, 99], "snshrivas10": 58, "so": [0, 2, 3, 5, 7, 10, 12, 18, 19, 20, 26, 27, 28, 31, 36, 49, 65, 70, 73, 76, 77, 79, 80, 81, 82, 83, 84, 89, 91, 93, 95, 98], "socketst": 0, "softmax": [5, 16, 27, 28, 82, 97], "softplu": 82, "softwar": [3, 5, 16, 28, 64, 71, 93], "solid": 78, "solut": [18, 69, 92, 96], "some": [0, 2, 3, 4, 5, 6, 7, 9, 12, 13, 15, 16, 19, 20, 26, 27, 28, 29, 30, 32, 58, 67, 70, 71, 74, 77, 78, 80, 81, 82, 85, 88, 89, 92, 93, 95, 96, 99], "someth": [16, 36, 51], "sometim": 73, "song": 73, "soon": [0, 21, 22, 23, 24, 25, 27, 36], "sophist": 52, "sora": [34, 61], "sort": [0, 1, 3, 6, 82], "sota": 93, "sourc": [14, 15, 17, 19, 20, 21, 24, 26, 28, 29, 30, 33, 34, 35, 37, 38, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 70, 71, 82, 83, 84, 85, 86, 87, 93], "source_root": [55, 56, 57], "sourcetaskvalu": 1, "soyer": [14, 16, 92], "space": [10, 65, 70, 79, 89, 98], "spaces_between_special_token": [70, 93], "span": [19, 26, 27], "spars": [12, 28, 82, 93], "sparse_fc1": 28, "sparse_fc2": 28, "sparsiti": 29, "spatial_norm_dim": 83, "spawn": [40, 50, 66, 67, 69, 75, 88, 92], "spawnprocess": [0, 2], "spec": 29, "spec_decode_algo": 27, "spec_decode_nextn": 27, "spec_decoding_generation_length": [82, 83, 84], "spec_decoding_is_generation_length_vari": [82, 83, 84], "spec_decoding_max_generation_length": [82, 83], "spec_decoding_packed_mask": [82, 83, 84], "spec_decoding_param": [83, 84], "spec_decoding_position_offset": [82, 83, 84], "spec_decoding_us": [82, 83], "specdec": 0, "specdecconfig": 0, "specdecfastlogitsinfo": 0, "specdecodinggenerationlength": 1, "specdecodinggenerationlengthshost": 1, "specdecodingpackedmask": 1, "specdecodingparam": 83, "specdecodingpositionoffset": 1, "specdecodingstat": 0, "specdecstat": 0, "special": [2, 5, 10, 16, 17, 21, 27, 29, 70, 93], "specif": [0, 1, 4, 6, 7, 8, 10, 11, 12, 15, 19, 22, 25, 26, 28, 30, 52, 65, 67, 73, 76, 77, 80, 82, 88, 93, 95, 96], "specifi": [0, 1, 2, 3, 5, 6, 7, 8, 10, 12, 17, 19, 20, 29, 30, 36, 43, 44, 45, 52, 54, 58, 59, 65, 69, 70, 72, 73, 74, 76, 77, 79, 81, 82, 84, 85, 87, 88, 89, 92, 93, 97], "specul": [0, 1, 3, 26, 64, 68, 70, 73, 75, 82, 93], "speculative_config": [20, 26, 27, 43, 44, 53, 54, 70], "speculative_decod": 93, "speculative_decoding_draft_tokens_extern": 84, "speculative_decoding_mod": [29, 70, 73], "speculative_model": [43, 44, 54, 70], "speculativedecod": 0, "speculativedecodingconfig": 0, "speculativedecodingfastlogitsinfo": 0, "speculativedecodingmetr": 0, "speculativedecodingmod": [70, 84, 93], "speculativedecodingmodul": 93, "speculativedecodingoutput": 1, "speed": [16, 22, 26, 27, 28, 29, 73, 74, 80, 93], "speedup": [22, 24, 25, 26, 28], "spent": 0, "split": [1, 4, 5, 10, 16, 70, 73, 76, 77, 82, 89, 93], "split_input_id": 87, "split_prompt_by_imag": 87, "split_siz": 82, "split_size_or_sect": 82, "splittransposecpu": 1, "splittransposecpuinn": 1, "splitwis": 2, "spot": 79, "sq": [25, 90, 93], "sqrt": [5, 82], "squar": [79, 82], "squared_relu": 82, "squeez": [1, 82, 87], "src": [1, 16, 82], "src_seq_len": 82, "srcdesc": 0, "srctype": 1, "srun": [16, 30, 55, 56, 57, 67, 92], "sshd": 31, "ssid": 45, "ssm": 82, "ssm_state": 84, "stabil": 26, "stabl": [5, 17, 29, 75, 79, 80, 82, 93], "stack": [17, 26, 65, 82], "stage": [0, 5, 7, 12, 27, 74, 89, 93, 97], "stai": [22, 25, 76, 80], "stand": 16, "standalon": 19, "standard": [12, 16, 18, 21, 74, 82], "starcod": [69, 91, 93], "starcoder1": 90, "starcoder2": [90, 93], "starrickliu": 93, "start": [0, 3, 5, 7, 9, 20, 27, 29, 31, 32, 33, 34, 35, 37, 38, 39, 57, 58, 60, 61, 62, 65, 69, 70, 71, 73, 74, 75, 76, 79, 81, 82, 84, 86, 87, 89, 93], "start_dim": 82, "startup": 92, "stat": [0, 70, 93], "state": [0, 1, 3, 4, 5, 7, 8, 9, 12, 20, 26, 27, 29, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 59, 66, 67, 70, 73, 74, 75, 79, 81, 82, 88, 93, 94, 99], "state_dtyp": 87, "state_or_ptr": 82, "state_s": 87, "statement": 69, "stateptr": 0, "states": 1, "static": [0, 1, 3, 12, 28, 29, 70, 82, 83, 84, 87, 93], "static_batch": [70, 81], "static_cast": 90, "staticbatchingstat": 0, "statist": [0, 3, 12, 30, 70, 73, 93], "statu": 92, "std": [0, 1, 3], "stddev": [30, 38, 39], "stdev": [20, 56, 72, 73, 74, 75], "stdit": 93, "stdout": [20, 56, 72, 73, 74, 75], "steadi": 74, "steady_clock": 0, "step": [0, 1, 5, 6, 7, 9, 12, 15, 16, 18, 19, 21, 26, 27, 32, 52, 64, 66, 67, 70, 71, 73, 74, 75, 82, 87, 92, 96, 97, 98, 99], "still": [5, 17, 19, 20, 26, 27, 28, 71, 73, 75, 77, 82, 87, 89, 93], "stop": [0, 1, 3, 6, 7, 12, 70, 73, 79, 87, 88, 93], "stop_reason": [53, 70, 88, 93], "stop_token_id": [3, 70], "stop_words_data": 87, "stop_words_list": 87, "stopping_criteria": 87, "stoppingcriteria": [87, 93], "stoppingcriterialist": 87, "stoptokenid": [0, 3], "stopword": 0, "stopwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "storag": [0, 8, 10, 70], "store": [0, 1, 5, 8, 9, 10, 16, 22, 26, 51, 54, 69, 70, 73, 81, 82, 84, 89, 90, 95, 97, 98], "store_tru": 54, "stored_block": 51, "stori": 58, "str": [15, 19, 47, 48, 70, 82, 83, 84, 87], "straightforward": 27, "strategi": [0, 11, 12, 25, 27, 36, 49, 64, 70, 73, 78, 82, 84, 89, 93], "stream": [0, 1, 2, 3, 16, 28, 29, 30, 36, 38, 39, 40, 41, 52, 70, 72, 82, 87, 89, 92, 93], "stream_ptr": 52, "streaming_llm": 93, "streamingllm": [29, 64, 93], "streamlin": [73, 88], "streamptr": [0, 1, 3], "street": 58, "strenum": [70, 86], "strict": [26, 27], "strict_bound": 82, "strict_dtyp": [82, 83], "stricter": 26, "strictli": 73, "stride": [1, 82, 83], "strike": [12, 51], "string": [0, 1, 3, 15, 45, 70, 73, 82, 87], "string_valu": 9, "string_view": 1, "stringptrmap": 1, "stringvec": 0, "strip": [29, 93], "strip_plan": 29, "strongli": 77, "strongly_typ": [70, 93], "struct": [0, 1, 8], "structur": [0, 4, 7, 8, 12, 28, 52, 70, 82, 89, 93], "structural_tag": 70, "struggl": 58, "student": [42, 46, 47, 49, 50, 52], "studi": [28, 75, 77, 78, 80], "style": [5, 12, 26, 93], "sub": [15, 19, 82], "subclass": [1, 19, 52, 95], "subcommad": 73, "subcommand": [74, 93], "subgraph": [7, 82], "subject": [2, 21, 23, 24, 25, 69, 82, 88, 94], "submiss": 73, "submit": [10, 70, 73], "submit_sync": 70, "submittransferrequest": 0, "submodul": [20, 65, 95], "suboptim": 16, "subscript": 82, "subsequ": [2, 9, 10, 12, 27, 75], "subset": [0, 3, 6, 16, 19, 27, 73, 82], "substanti": [9, 12, 26, 28], "subsystem": 93, "subtract": 7, "succe": [89, 93], "succeed": 87, "success": [3, 22, 26, 74], "successfulli": [12, 32, 77], "sudo": [20, 26, 66, 67, 73], "suffer": 26, "suffici": [76, 77], "suggest": [5, 25, 58, 77], "suit": [5, 73, 74], "sum": [1, 7, 14, 82, 98], "sum_of_token": 82, "summar": [5, 12, 13, 14, 15, 23, 25, 73, 74, 81, 89], "summari": [8, 12, 64], "summat": 82, "sunjiabin17": 93, "super": [7, 14, 17, 19, 91, 92, 95, 99], "superchip": 91, "supplementari": 83, "suppli": [10, 18], "support": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 36, 45, 52, 55, 56, 57, 58, 64, 67, 68, 70, 74, 75, 77, 79, 80, 81, 82, 83, 85, 88, 92, 93, 94, 95, 96, 97, 98, 99], "supportsinflightbatch": 1, "suppos": 95, "suprem": [42, 46, 47, 49, 50], "sure": [2, 19, 20, 27, 32, 65, 73, 81, 82, 93], "surpass": 5, "surround": [5, 93], "swa": 8, "swap": 8, "sweep": [16, 22, 79], "sweet": 79, "swept": 23, "swiglu": [29, 82, 93], "switch": [4, 9, 11, 22, 25, 26, 28, 65, 81, 89, 93], "sxm": [22, 29, 75, 77, 78], "sy": 93, "symbol": 0, "sync": 87, "synchron": [1, 3, 16, 70, 92, 93], "syncmessag": 0, "syntax": [82, 88], "synthet": [20, 30, 38, 39, 73, 74], "synthetic_128_128": 73, "synthetic_2048_2048": 75, "synthetic_2048_2048_1000": 75, "system": [8, 9, 16, 20, 22, 27, 28, 30, 33, 34, 45, 55, 56, 57, 60, 61, 64, 65, 67, 74, 76, 88, 91, 93, 94], "systemat": 26, "t": [0, 1, 5, 12, 16, 19, 26, 28, 30, 31, 36, 51, 55, 56, 57, 67, 70, 72, 73, 76, 79, 80, 82, 84, 87, 92], "t5": [5, 6, 90, 91, 93], "t_": 27, "t_2": 27, "t_5": 27, "tabl": [0, 6, 9, 22, 25, 29, 73, 74, 82, 83, 87, 91, 92, 93], "tackl": 28, "tactic": [28, 29], "tag": [0, 31, 65, 70], "tailor": [25, 77, 80], "take": [0, 1, 2, 5, 6, 7, 9, 11, 15, 19, 27, 51, 58, 71, 73, 75, 76, 79, 82, 83, 98], "taken": [17, 21, 22, 82], "talk": 58, "tanh": [82, 83], "target": [0, 17, 20, 28, 29, 36, 64, 65, 73, 80, 81, 93], "target_isl": 73, "target_osl": 73, "targetcach": 1, "targetpageid": 1, "targetprob": 1, "targettaskvalu": 1, "tarot": 58, "task": [0, 1, 9, 10, 12, 14, 15, 47, 48, 55, 56, 57, 70, 73, 83, 87, 90, 93, 98], "task_id": [10, 73], "task_vocab_s": 83, "taskid": [0, 1], "taskidtyp": 1, "tasklayermoduleconfig": 1, "tasklayermoduleconfigbind": 1, "tasklayermoduleconfiglistptr": 1, "taskshost": 1, "taskvalu": 1, "taskvalueptr": 1, "taslid": 1, "tayef": 93, "tconstptr": 1, "tcp": 32, "team": [15, 19, 20, 26, 27, 28, 32, 91, 93], "tech": [27, 93], "technic": [8, 27, 28, 64], "techniqu": [5, 7, 12, 16, 21, 26, 27, 28, 71, 76, 77, 78, 81, 90, 93], "technologi": [26, 42, 46, 47, 49, 50, 52], "tekit_2025": 73, "tell": [34, 58, 59, 61, 80, 88], "temb": 83, "temp": 87, "temperatur": [0, 1, 6, 30, 33, 34, 35, 36, 40, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 59, 66, 67, 70, 73, 75, 81, 87, 88, 93], "tempfil": [46, 49], "templat": [0, 1, 16, 17], "tempor": 87, "temporari": 2, "ten": [12, 25, 27], "tend": 81, "tensor": [1, 6, 11, 15, 16, 17, 20, 21, 22, 23, 24, 26, 27, 28, 30, 50, 52, 64, 70, 73, 74, 77, 78, 80, 82, 83, 84, 87, 90, 92, 93, 95, 97], "tensor_dict": 87, "tensor_input": 7, "tensor_parallel_s": [50, 51, 54, 55, 56, 57, 70, 75, 76, 77, 80, 81], "tensor_shap": 17, "tensorconstptr": 1, "tensorinfo": 87, "tensorloc": 82, "tensormap": 1, "tensorparallel": [0, 1, 6], "tensorptr": [0, 1], "tensorrt": [1, 3, 5, 6, 7, 8, 11, 13, 14, 21, 24, 26, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 66, 67, 68, 72, 74, 77, 78, 80, 81, 82, 87, 90, 92, 94, 95, 96, 97, 98, 99], "tensorrt_llm": [0, 1, 2, 3, 5, 6, 7, 10, 13, 14, 16, 17, 19, 20, 30, 31, 32, 36, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 65, 66, 67, 70, 73, 74, 75, 77, 80, 81, 82, 83, 84, 85, 86, 87, 88, 92, 93, 94, 95, 96, 97, 98], "tensorrt_llm_gpt": 16, "tensorrt_llm_rouge1_threshold": 15, "tensorrtllm_backend": [10, 88, 93], "term": [16, 69, 81, 82, 88], "termin": [0, 9, 32, 74, 93], "test": [5, 25, 26, 27, 30, 34, 61, 64, 65, 66, 67, 73, 74, 75, 77, 78, 79, 80, 81, 91, 93, 98], "test_graph_rewrit": 7, "test_trt_llm": [13, 14, 15], "texec": 0, "text": [0, 3, 5, 6, 9, 29, 34, 36, 40, 41, 42, 50, 51, 59, 61, 66, 67, 70, 71, 73, 74, 75, 81, 87, 88, 91, 92, 93, 94], "text_diff": 70, "text_hidden_s": 84, "textattack": 91, "textprompt": 70, "tg_group": 82, "tgt": [16, 82], "tgt_len": [82, 83], "tgt_seq_len": 82, "th": [1, 15, 27, 82], "than": [0, 1, 2, 3, 5, 6, 7, 9, 12, 16, 20, 21, 22, 23, 25, 26, 27, 28, 29, 65, 70, 71, 73, 74, 75, 76, 77, 79, 81, 82, 87, 89, 92, 93, 97], "thank": [27, 93], "thecodewrangl": 93, "thei": [0, 1, 3, 5, 6, 10, 16, 17, 19, 26, 27, 28, 53, 65, 70, 73, 75, 77, 79, 80, 81, 82, 84, 90, 93], "them": [0, 3, 4, 7, 12, 13, 20, 26, 27, 28, 55, 56, 57, 70, 71, 72, 73, 76, 78, 79, 81, 82, 87, 89, 95], "theoret": 89, "theori": 81, "therebi": [2, 81], "therefor": [13, 19, 74, 82, 92, 98], "thermal": 73, "theta": 82, "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 36, 40, 45, 52, 54, 55, 56, 57, 58, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99], "thin": 19, "thing": [6, 32, 42, 46, 47, 49, 50, 52, 79, 80], "think": [26, 27, 28, 51, 78], "third": [3, 93], "those": [3, 5, 6, 15, 16, 18, 20, 26, 27, 28, 29, 30, 72, 74, 75, 80, 82, 83, 90], "though": [19, 27, 79, 89], "thread": [0, 1, 5, 11, 36, 69, 73, 87], "three": [2, 3, 15, 25, 26, 28, 81, 82, 90, 95, 96, 97], "threshold": [0, 26, 27, 82, 87], "throttl": 73, "through": [0, 5, 6, 7, 11, 12, 16, 17, 18, 20, 26, 29, 30, 65, 71, 73, 75, 76, 77, 79, 80, 83, 88, 93], "throughout": [75, 78], "throughput": [0, 3, 5, 21, 22, 23, 27, 56, 64, 72, 77, 79, 80, 81, 93, 97], "throw": [0, 1], "thu": [9, 19, 20, 26, 28, 65, 82, 89], "thumb": [5, 76, 92], "ti": [5, 27], "tiiuae": 73, "tile": 28, "time": [0, 1, 2, 3, 5, 9, 10, 11, 12, 13, 16, 20, 23, 25, 26, 27, 28, 29, 42, 46, 47, 48, 49, 50, 58, 64, 65, 70, 71, 72, 73, 74, 75, 77, 78, 79, 81, 82, 87, 92, 93, 98], "time_embed_dim": 83, "time_encod": 87, "time_point": 0, "timedelta": 70, "timedout": 0, "timelin": 15, "timeout": [0, 30, 36, 70, 93], "timepoint": 0, "timestamp": 0, "timestep": [83, 84], "timestepembed": 83, "timingmetr": 0, "tini": 58, "tinyllama": [30, 33, 35, 38, 40, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 58, 59, 60, 62, 66, 67, 69, 88, 94], "tip": 64, "titl": 45, "tle": 13, "tllm_checkpoint_16gpu_tp8_pp2": 76, "tllm_ckpt_dir": 14, "tllm_engine_dir": 14, "tllm_kei": [17, 83], "tllm_llmapi_build_cach": 93, "tllm_llmapi_enable_nvtx": 72, "tllm_log_level": 92, "tllm_nvtx_debug": 72, "tllm_override_layer_num": 93, "tllm_profile_record_gc": 72, "tllm_profile_start_stop": 72, "tllm_to_externel_key_dict": 17, "tllm_torch_profile_trac": 72, "tllm_trace_model_forward": 93, "tllm_weight": 17, "tllmruntim": [1, 6, 92], "tlntin": 93, "tmp": [10, 13, 56, 72, 73, 76], "tmp9so41y3r": 73, "tmpowsrb_f4": 73, "tmpxhdvasex": 73, "to_arrai": 82, "to_dict": [70, 84], "to_json_fil": 84, "to_layer_quant_config": 84, "to_legacy_set": 85, "to_str": [0, 1, 3], "to_trt": 84, "tobyt": 1, "todo": [1, 54, 82], "togeth": [3, 5, 6, 10, 16, 18, 21, 26, 27, 29, 87, 90, 93], "toggl": 72, "toi": 79, "toitensor": 0, "tojsonstr": 0, "tok": [21, 23, 24, 80], "token": [0, 1, 2, 3, 4, 5, 6, 8, 9, 12, 16, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 38, 39, 45, 51, 52, 56, 64, 70, 72, 73, 74, 75, 77, 78, 80, 82, 83, 84, 87, 88, 89, 90, 93, 95, 96, 97], "token_drop": 83, "token_end": 70, "token_extra_id": 51, "token_id": [36, 51, 52, 53, 70], "token_ids_diff": 70, "token_range_retention_config": 70, "token_start": 70, "token_type_id": [84, 87], "tokenend": 0, "tokenextraid": 1, "tokenextraidtyp": 1, "tokenid": 1, "tokenidtyp": [0, 1], "tokenization_utils_bas": 70, "tokenizer_dir": [14, 16, 88, 92], "tokenizer_image_token": 87, "tokenizer_max_seq_length": [70, 77, 84, 86], "tokenizer_mod": 70, "tokenizer_revis": 70, "tokenizer_str": [0, 3], "tokenizerbas": 70, "tokenizerstr": [0, 3], "tokenlogprob": 70, "tokenrangeretentionconfig": [0, 70], "tokenrangeretentionprior": 0, "tokens_per_block": [8, 9, 29, 87, 93, 98], "tokensperblock": [0, 1, 6], "tokensperstep": 1, "tokensprompt": 70, "tokenstart": 0, "tokyo": [34, 61], "toler": 25, "tomodulenam": 1, "tomoduletyp": 1, "tonylek": 93, "too": [3, 5, 20, 28, 75, 79, 92], "took": 75, "tool": [2, 15, 20, 28, 64, 69, 73, 93], "tool_cal": 88, "toolkit": [18, 19, 25, 26, 67, 96], "top": [0, 5, 6, 12, 16, 18, 27, 28, 70, 82, 93], "top1": 26, "top_k": [6, 70, 87, 93], "top_p": [6, 40, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 59, 66, 67, 70, 75, 81, 87, 88], "top_p_decai": [70, 87], "top_p_min": [70, 87], "top_p_reset_id": [70, 87], "topenkoff": 93, "topic": 80, "topk": [0, 1, 4, 6, 12, 26, 28, 82, 93], "topk_logit": 3, "topklastdim": 82, "topklogit": 3, "topkmedusahead": 1, "topktopp": [0, 6], "topmodelmixin": [19, 84], "topn": 26, "topp": [0, 1, 6, 93], "toppdecai": [0, 1, 6], "toppmin": [0, 1, 6, 70], "toppresetid": [0, 1, 6], "torch": [5, 17, 52, 59, 65, 66, 67, 70, 73, 82, 87, 92, 95], "torch_compile_en": 70, "torch_compile_enable_userbuff": 70, "torch_compile_fullgraph": 70, "torch_compile_inductor_en": 70, "torch_compile_piecewise_cuda_graph": 70, "torchaudio": [66, 67], "torchllmarg": 70, "torchvis": [66, 67], "tostr": [0, 1], "total": [0, 1, 4, 5, 6, 12, 15, 17, 20, 27, 29, 30, 73, 74, 75, 76, 89, 98], "total_lat": [21, 24], "total_token": 88, "totalaccepteddrafttoken": 0, "totaldrafttoken": 0, "totalgentoken": 1, "totalnumpag": 1, "totensor": 0, "touch": [31, 95], "tp": [0, 2, 4, 6, 10, 16, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 56, 73, 74, 75, 82, 93], "tp1": [21, 22, 23], "tp2": 73, "tp4": 26, "tp4ep2": 26, "tp8": [23, 26, 28], "tp8ep2": 26, "tp_1_pp_1": 73, "tp_dim": [17, 83], "tp_group": [82, 83], "tp_rank": [17, 82, 83], "tp_size": [4, 10, 15, 16, 17, 19, 30, 37, 55, 57, 73, 74, 76, 82, 83, 86, 93], "tp_split_dim": 83, "tpot": [24, 74], "tprank": 1, "tpsize": 1, "tqdm": [17, 70, 93], "trace": [19, 29, 30, 72, 92], "track": [5, 8, 70, 82], "trade": [9, 28], "tradeoff": [25, 26, 27, 77], "tradit": 0, "train": [12, 14, 15, 16, 18, 19, 22, 25, 27, 73, 82, 92, 95], "trait": 93, "transa": 82, "transb": 82, "transceiv": [0, 70], "transfer": [0, 2, 16, 28, 52, 70, 93], "transfer_mod": 70, "transferdesc": 0, "transfermod": 0, "transferop": 0, "transferrequest": 0, "transferstatu": 0, "transform": [0, 4, 5, 12, 14, 15, 16, 17, 29, 30, 36, 70, 84, 88, 89, 91, 92, 93, 95, 96, 98], "translat": [81, 93], "transmiss": [2, 11], "transmit": [2, 11], "transpos": [1, 15, 82], "transposit": 82, "travers": 16, "treat": [5, 26, 82], "tree": [0, 73, 87, 92, 98], "tri": [28, 99], "tricki": 84, "trigger": [5, 7, 16, 29, 36, 59, 69], "trim": 1, "trimpool": 1, "triton": [9, 10, 12, 16, 18, 64, 71, 93], "tritonserv": 93, "trivial": 16, "troubleshoot": [64, 93], "trt": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 22, 31, 46, 49, 73, 79, 82, 84, 86, 87, 89, 92, 93, 97], "trt_ckpt": [10, 13, 15, 92], "trt_engin": [10, 13, 15, 92], "trt_root": 20, "trt_tensor": [16, 82], "trtdatatyp": 1, "trtgptmodel": 89, "trtgptmodeloptionalparam": 93, "trtgptmodelv1": 93, "trtllm": [9, 10, 13, 14, 15, 16, 19, 20, 27, 33, 34, 35, 36, 37, 38, 39, 40, 41, 55, 60, 61, 62, 64, 69, 70, 73, 74, 77, 78, 79, 80, 89, 92, 93], "trtllm_dg_jit_use_nvcc": 20, "trtllm_disable_kv_cache_transfer_overlap": 2, "trtllm_disable_unified_convert": 17, "trtllm_enable_kvcache_receive_parallel": 2, "trtllm_enable_mmha_multi_block_debug": 73, "trtllm_enable_pdl": [20, 26, 27, 73], "trtllm_force_xqa": 5, "trtllm_kvcache_send_max_concurrency_num": 2, "trtllm_kvcache_transfer_buffer_s": 2, "trtllm_kvcache_transfer_use_async_buff": 2, "trtllm_mmha_blocks_per_sequ": 73, "trtllm_mmha_kernel_block_s": 73, "trtllm_model": 17, "trtllm_modules_to_hf_modul": 87, "trtllm_parallel_cache_send": 2, "trtllm_pdl_overlap_ratio": 73, "trtllm_precompiled_loc": 65, "trtllm_prefetch_ratio": 73, "trtllm_request_kv_cache_concurr": 2, "trtllm_serv": 30, "trtllm_try_zcopy_for_kvcache_transf": 2, "trtllm_use_mpi_kvcach": 2, "trtllm_use_precompil": 65, "trtllm_use_ucx_kvcach": 2, "trtllmarg": 70, "trtllmattent": 97, "trtlmmdatatyp": 0, "true": [0, 1, 3, 6, 7, 9, 12, 15, 20, 26, 27, 28, 30, 36, 42, 43, 44, 48, 49, 51, 52, 53, 54, 56, 58, 70, 72, 73, 74, 77, 80, 82, 83, 84, 85, 87, 89, 92, 93], "true_output_valu": 82, "true_valu": 82, "truncat": [70, 93], "truncate_prompt_token": [70, 93], "trust": [28, 70], "trust_remote_cod": [30, 70, 93], "try": [0, 1, 3, 14, 19, 53, 58, 69, 74, 77, 79, 80, 81, 88, 89, 92, 94], "tsuji": 73, "ttensor": 1, "ttft": [74, 77, 79, 80, 81, 93], "ttim": 93, "ttl": 26, "tunabl": 78, "tune": [0, 2, 3, 12, 22, 25, 26, 28, 29, 64, 70, 73, 74, 77, 80, 83, 84, 87, 88, 89, 93], "tuner": 0, "tupl": [0, 1, 82, 83, 87, 99], "turn": [5, 6, 9, 12, 28, 65, 77, 87, 89, 93], "tushar": 93, "tweak": 81, "twice": 16, "two": [0, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 19, 22, 26, 27, 28, 29, 30, 34, 61, 65, 69, 73, 75, 77, 79, 81, 82, 83, 85, 93, 96, 98, 99], "twofold": 12, "twoshot": 82, "txt": [19, 20, 56, 67, 72, 73, 75, 88, 93], "type": [1, 2, 3, 5, 6, 7, 10, 15, 16, 22, 25, 28, 29, 30, 33, 34, 35, 38, 39, 45, 51, 52, 54, 61, 70, 73, 77, 80, 82, 84, 86, 87, 88, 90, 91, 92, 93, 95, 96, 97, 98], "typedef": [0, 1], "typenam": [0, 1, 16], "typetrait": 0, "typic": [0, 2, 7, 14, 16, 19, 25, 27, 28, 30, 67, 69, 76, 77, 80, 81, 85, 87, 89, 93, 95], "typo": 93, "u": [1, 7, 28, 31, 42, 46, 47, 48, 49, 50, 59, 73, 74, 93], "ub": 82, "ub_oneshot": 73, "ub_tp_siz": 73, "ubuntu": [66, 67, 93, 94], "uc_handl": 1, "uc_ptr": 1, "uc_va": 1, "ucx": [2, 93], "ucx_cuda_copy_async_mem_typ": 2, "ucx_cuda_copy_dmabuf": 2, "ucx_info": 2, "ucx_memtype_cach": 2, "ucx_rndv_frag_mem_typ": 2, "ucx_rndv_pipeline_error_handl": 2, "uid": [0, 87], "uint16_t": 0, "uint32": 1, "uint32_t": [0, 1, 82], "uint64": [1, 9], "uint64_t": [0, 1], "uint8": 1, "uint8_t": [0, 1], "uintptr_t": [0, 1], "uk": 28, "uk_bgemm": 26, "ulimit": [65, 92], "ultim": 76, "ulyss": 93, "unabl": [67, 79], "unaccept": 77, "unari": 82, "unaryoper": 82, "unbind": 82, "uncas": 91, "uncertainti": 12, "unchang": [12, 80, 82], "uncommon": 16, "undefin": 82, "under": [0, 25, 29, 65, 69, 73, 74, 92, 93], "underli": [0, 1, 7, 12], "underlying_type_t": 1, "underlyingtyp": [0, 1], "underscor": 77, "understand": [64, 65, 72], "understood": [70, 79], "underutil": 12, "uneven": 93, "unevenli": 26, "unexpect": [92, 93], "unfinish": 0, "unfus": 82, "unfuse_qkv_project": 84, "ungath": 1, "unguid": 45, "unif": 93, "unifi": [15, 19, 25, 93], "uniform": [73, 74, 82], "uniniti": 97, "uninstal": 67, "union": [70, 82], "uniqu": [0, 5, 6, 8, 10, 12, 15, 29, 70, 73], "unique_ptr": [0, 1], "unique_token": 51, "uniqueconstptr": 1, "uniqueptr": 1, "uniquetoken": 1, "unit": [1, 8, 17, 28, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 54, 59, 64, 65, 66, 67, 73, 75, 81, 88, 94], "univers": [42, 46, 47, 49, 50, 52], "unless": [0, 36, 70, 76, 80, 81], "unlik": [9, 12], "unlock": 71, "unnecessari": [7, 93, 95, 99], "unneed": [5, 26], "unordered_map": [0, 1, 3], "unpatchifi": 84, "unschedul": 79, "unset": 81, "unsign": 1, "unspecifi": [29, 30, 82], "unsqueez": [1, 82], "unstabl": 19, "unsupport": 93, "until": [0, 1, 3, 6, 9, 12], "untouch": 82, "unus": [0, 73], "up": [0, 5, 6, 10, 12, 20, 22, 23, 26, 27, 28, 29, 45, 70, 73, 79, 80, 93, 98], "up_proj": 17, "upcast": 82, "upcast_attent": 83, "upcast_softmax": 83, "upcom": [25, 98], "updat": [0, 8, 12, 16, 17, 19, 20, 23, 27, 28, 29, 31, 52, 65, 70, 82, 87, 92, 98], "update_from_dict": 70, "update_key_map": 17, "update_kv_cache_typ": 70, "update_output_ids_by_offset": 87, "update_resourc": [96, 98], "update_strategi": 82, "updatenumreturnbeam": 0, "updatespositionid": 1, "upgrad": [66, 67, 88], "uplift": [77, 79, 80], "upon": [12, 74, 80, 92, 93], "upper": [73, 82, 89], "uq_qr_gemm": 26, "url": [30, 34, 38, 39, 61, 65, 66, 67, 93], "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 11, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 40, 41, 45, 48, 55, 56, 57, 58, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82, 83, 84, 85, 87, 88, 90, 92, 93, 94, 95, 96, 97, 98, 99], "usabl": 94, "usag": [0, 5, 7, 8, 16, 19, 21, 24, 28, 29, 30, 40, 64, 70, 73, 80, 81, 82, 88, 93, 97], "use_beam_hyp": 87, "use_beam_search": [49, 70, 93], "use_cach": [82, 83, 84], "use_context_fmha_for_gener": 93, "use_cuda_graph": [20, 27, 56, 70, 74], "use_custom_all_reduc": 93, "use_diff_of_squar": 82, "use_dynamic_tre": [43, 44, 70], "use_embedding_shar": 93, "use_fp32_acc": 82, "use_fp8": 83, "use_fp8_context_fmha": [5, 29, 73, 93], "use_fused_mlp": [29, 73, 93], "use_gemm_allreduce_plugin": 87, "use_gpt_attention_plugin": 87, "use_gpu_direct_storag": 87, "use_implicit_relative_attent": 83, "use_kv_cach": [70, 83, 87], "use_logn_sc": 83, "use_lora": 84, "use_lora_plugin": 87, "use_mamba_conv1d_plugin": 87, "use_meta_recip": 70, "use_modelopt_ckpt": 54, "use_modelopt_quant": 19, "use_mrop": 70, "use_one_more_block": 87, "use_paged_context_fmha": [5, 9, 29, 73, 77, 80], "use_parallel_embed": [15, 16, 84], "use_preload": 84, "use_prompt_tun": [84, 93], "use_py_sess": 92, "use_refit": 70, "use_relaxed_acceptance_for_think": [26, 27, 70], "use_runtime_default": 87, "use_safetensors_load": 84, "use_strip_plan": 70, "use_tqdm": 70, "use_variable_beam_width_search": 87, "usebantoken": 0, "usebanword": 0, "usecrossattent": 1, "usedefaultvalu": 1, "usednumblock": 0, "usedraftlogit": 1, "usedraftlogitshost": 1, "usedynamictre": 0, "usedynamictreehost": 1, "useexpliciteosstop": 0, "usefrequencypenalti": 0, "usegemmallreduceplugin": 1, "usegptattentionplugin": [1, 6], "usegpudirectstorag": 0, "uselanguageadapt": 1, "useloraplugin": 1, "usemambaconv1dplugin": 1, "usemaxlengthstop": 0, "useminlen": 0, "useminlength": 0, "useminp": 0, "usemrop": 1, "usenorepeatngrams": 0, "useoccurrencepenalti": 0, "usepackedinput": 1, "usepagedst": 1, "usepenalti": 0, "usepositionembed": 1, "usepresencepenalti": 0, "useprogthread": 0, "useprompttun": 1, "user": [0, 2, 3, 5, 6, 7, 9, 10, 11, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 30, 31, 33, 34, 43, 44, 45, 49, 52, 53, 54, 60, 61, 65, 69, 70, 72, 73, 74, 79, 80, 81, 82, 84, 88, 89, 90, 92, 93], "user_buff": [29, 77], "userandomacceptancethreshold": 1, "userbuff": [70, 93], "userepetitionpenalti": 0, "userwarn": 67, "useshapeinfer": 1, "usespecdecod": 1, "usestopword": 0, "usetemp": 0, "usetemperatur": 0, "usetokentypeembed": 1, "usevariablebeamwidthsearch": 0, "usr": [15, 20, 30, 33, 34, 35, 37, 38, 39, 67, 73], "usual": [16, 19, 27, 67, 70, 74, 75, 80, 82, 98], "util": [0, 1, 2, 5, 6, 12, 16, 20, 21, 26, 28, 29, 40, 67, 71, 72, 73, 77, 80, 81, 89, 93, 97], "uv": 28, "uv_gemm": 26, "uvm": [0, 1], "v": [1, 2, 5, 6, 10, 20, 21, 22, 25, 26, 28, 64, 82, 84, 87, 90, 91, 92, 95, 97], "v0": [10, 21, 22, 23, 24, 71, 73, 74, 91, 93], "v1": [30, 33, 34, 35, 38, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 60, 61, 62, 66, 67, 69, 88, 91, 93, 94], "v10": 93, "v100": 93, "v12": 93, "v2": [25, 28, 90, 93], "v3": [27, 30, 72, 90, 91, 93], "v9": 23, "v_dim": 82, "v_head_dim": [82, 83], "v_proj": [17, 95], "vacat": [42, 46, 47, 49, 50], "valid": [0, 1, 3, 12, 27, 70, 74, 82, 87], "validate_cuda_graph_config": 70, "validate_cuda_graph_max_batch_s": 70, "validate_positive_valu": 70, "validatevec": 1, "validationerror": 70, "validmpiconfig": 1, "valu": [0, 1, 2, 5, 6, 8, 9, 10, 11, 13, 15, 16, 17, 20, 21, 22, 27, 28, 29, 30, 36, 59, 70, 73, 75, 77, 79, 81, 82, 84, 85, 86, 87, 89, 90, 92, 93, 97, 98, 99], "valuabl": 26, "value_typ": 0, "valuestatu": 1, "vanilla": [5, 97], "vanillaattent": 97, "var": 82, "vari": [23, 79, 80, 98], "variabl": [0, 1, 6, 8, 17, 20, 23, 26, 55, 56, 57, 64, 67, 70, 72, 73, 92, 93], "variabledraftlength": 1, "varianc": [28, 77, 79, 80, 82], "variant": [0, 3, 5, 19, 21, 27, 28, 69, 82, 88, 93, 97], "varieti": [73, 75, 93], "variou": [5, 12, 18, 73, 77, 79, 93], "varnam": 1, "vartyp": 1, "vboost": [20, 26, 73], "vbw": 93, "ve": [26, 58], "vec": [0, 1], "vec2": 82, "veclogprob": 0, "vectoken": 0, "vectokenextraid": [0, 1], "vector": [0, 1, 3, 5, 6, 8, 10, 28, 82], "vecuniquetoken": [0, 1], "verbatim": 84, "verbos": [29, 30, 73], "veri": [5, 15, 16, 18, 25, 27, 75, 76, 77, 93], "verif": [0, 12, 27, 70], "verifi": [12, 27, 64, 80, 82, 93], "verificationsets": 0, "versa": [9, 28], "version": [0, 1, 2, 5, 6, 15, 17, 19, 20, 26, 28, 30, 36, 65, 67, 73, 75, 82, 88, 92, 93, 94], "vertic": 82, "vertical_strid": 83, "vgqa": 8, "via": [0, 2, 11, 12, 26, 55, 56, 57, 58, 65, 67, 73, 77, 78, 80, 81, 82, 93, 94], "vice": [9, 28, 59], "vicuna": [12, 43, 44, 54], "video": [34, 61, 73, 87, 91, 93], "video_grid_thw": 87, "video_path": 87, "video_preprocess": 87, "video_url": [34, 61], "view": [1, 27, 82, 87], "vila": [34, 61, 90, 91, 93], "vinyl": 73, "violat": 93, "virtual": [0, 1, 83], "vision": [87, 90, 91, 93], "vision_grid_thw": 87, "vision_length": 82, "vision_model_typ": 84, "vision_start": 82, "vision_token_mask": 83, "visit": [12, 26, 93], "visual": [79, 93], "visual_engine_dir": 87, "visual_featur": 87, "visualize_network": [29, 70, 93], "vit": 93, "vital": [7, 25], "vl": [30, 34, 39, 61, 73, 91, 93], "vlm": [91, 93], "vocab": [82, 87], "vocab_embed": [14, 17], "vocab_s": [0, 15, 17, 70, 83, 84, 87, 95], "vocab_size_pad": 87, "vocabs": [1, 6], "vocabsizepad": [0, 1], "vocabulari": [0, 1, 6, 9, 12, 74, 83, 87], "void": [0, 1, 3, 16], "volta": 93, "volum": [1, 11, 65, 73], "volumenonneg": 1, "vonjackustc": 93, "vote": [42, 46, 47, 49, 50], "vswa": 8, "vulner": 93, "vultureprim": 93, "w": [1, 24, 26, 28, 30, 82, 84, 90, 91, 93], "w1": 82, "w4a": [90, 93], "w4a16": [15, 25, 64, 70, 84], "w4a16_awq": [15, 19, 36, 59, 70], "w4a16_gptq": [15, 70], "w4a8": [25, 93], "w4a8_awq": [15, 19, 70], "w4a8_qserve_per_channel": 70, "w4a8_qserve_per_group": 70, "w4aint8": 93, "w8a": 90, "w8a16": [15, 25, 64, 70, 84], "w8a16_gptq": 70, "w8a8": [22, 25, 64], "w8a8_sq_per_channel": [15, 70], "w8a8_sq_per_channel_per_tensor_plugin": [70, 84], "w8a8_sq_per_channel_per_token_plugin": [70, 84], "w8a8_sq_per_tensor_per_token_plugin": [70, 84], "w8a8_sq_per_tensor_plugin": [70, 84], "wa": [0, 1, 3, 5, 6, 15, 27, 28, 67, 69, 73, 74, 75, 77, 79, 80, 81, 83, 90, 92, 93, 95, 99], "wai": [2, 5, 7, 11, 18, 26, 27, 28, 50, 52, 69, 71, 73, 75, 77, 82, 89, 93], "wait": [0, 1, 3, 19, 28, 36, 70, 71, 73, 82], "walk": [34, 58, 61, 75, 76, 77], "wang1120": 93, "wangkuiyi": 93, "want": [5, 12, 19, 26, 27, 32, 67, 70, 72, 73, 77, 79, 81, 82, 92, 93, 95], "warm": 98, "warmup": [20, 72, 73, 75, 93, 97, 98], "warn": [5, 29, 30, 70, 73, 74, 89], "warp": [11, 93], "wast": 28, "watch": 80, "wdkv": 26, "wdq": 26, "we": [1, 2, 4, 6, 7, 10, 11, 12, 13, 15, 19, 20, 24, 25, 26, 27, 28, 30, 31, 32, 42, 46, 47, 49, 50, 58, 59, 65, 67, 69, 72, 73, 74, 75, 76, 77, 79, 80, 82, 87, 88, 92, 93, 95], "weapon": 51, "wear": 51, "web": [18, 32], "weig": 82, "weight": [0, 1, 4, 10, 19, 21, 22, 25, 26, 27, 29, 30, 50, 64, 70, 71, 74, 75, 76, 77, 82, 83, 84, 87, 88, 93], "weight_index": 82, "weight_load": 83, "weight_only_groupwise_quant_matmul": 90, "weight_only_precis": 93, "weight_spars": [29, 70], "weight_stream": [13, 29, 70], "weightonlygroupwisequantmatmulplugin": 90, "weights_dict": 19, "weights_scaling_factor": [15, 17], "weightsinpoint": 1, "weightsoutpoint": 1, "well": [5, 6, 16, 18, 22, 36, 72, 79, 80, 90, 91], "were": [0, 1, 12, 15, 19, 21, 25, 28, 74, 76, 79, 93], "weren": 67, "wget": 92, "what": [2, 3, 28, 34, 58, 61, 64, 65, 72, 73, 75, 77, 79, 80], "whatev": 1, "wheel": [65, 67, 93], "when": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 16, 17, 19, 20, 24, 25, 27, 28, 29, 31, 36, 52, 64, 65, 67, 70, 72, 73, 75, 77, 79, 80, 81, 82, 83, 84, 87, 88, 89, 90, 92, 93, 95, 97, 98], "whenev": 1, "where": [0, 1, 2, 5, 6, 8, 9, 11, 12, 15, 16, 21, 25, 26, 27, 28, 30, 33, 35, 36, 58, 60, 62, 70, 73, 74, 77, 79, 81, 82, 87, 88, 90, 93, 99], "wherea": [0, 15, 79], "whether": [0, 1, 2, 3, 5, 10, 29, 70, 76, 77, 80, 82, 83, 87, 96, 97], "which": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 15, 16, 17, 19, 21, 25, 26, 27, 28, 29, 30, 65, 67, 69, 70, 72, 73, 75, 77, 79, 80, 81, 82, 84, 85, 87, 88, 89, 90, 93, 94, 96, 97, 99], "while": [0, 1, 4, 7, 8, 9, 11, 12, 16, 19, 21, 22, 24, 25, 26, 27, 28, 67, 71, 73, 75, 76, 77, 78, 79, 80, 81, 82, 89, 90, 93, 97], "whisper": [90, 91, 93], "whisperencod": 84, "whl": [20, 65, 66, 67], "who": [27, 69], "whole": [1, 70, 71, 82], "whose": [2, 9, 15, 26, 83], "why": [0, 2, 16, 28, 70, 77, 79, 80, 82, 89], "wide": [0, 4, 27, 70, 75], "width": [0, 1, 5, 6, 39, 70, 83, 87, 89, 93], "win": 70, "window": [0, 1, 8, 12, 29, 64, 70, 73, 82, 87, 93], "window_s": 5, "windows": 0, "wip": 26, "wireless": 45, "wirelessaccesspoint": 45, "wise": [7, 70, 82, 93], "wish": 9, "wit": 51, "with_ssh": 31, "within": [1, 2, 5, 8, 11, 12, 16, 28, 51, 70, 73, 76, 77, 79, 80, 82, 88, 98], "without": [0, 1, 3, 5, 11, 12, 16, 17, 20, 25, 26, 29, 36, 51, 71, 73, 77, 80, 82, 84, 93, 95, 97], "wkr": 26, "wo": [17, 26, 93], "wo_gemm": 26, "won": [67, 76], "word": [0, 3, 5, 70, 82, 87, 93], "word_dict": 87, "word_embed": 17, "word_embeddings_layernorm": 17, "work": [5, 6, 7, 8, 11, 12, 16, 19, 20, 36, 52, 55, 56, 57, 59, 65, 67, 71, 74, 78, 82, 87, 90, 92, 93, 95], "workaround": [17, 20, 93], "workdir": [30, 55, 56, 57, 65], "worker": [16, 29, 30, 70, 73, 89, 93], "workerexecutablepath": 0, "workflow": [5, 6, 14, 15, 20, 27, 36, 64, 69, 74, 75, 77, 78, 82, 88, 92, 93, 94], "workload": [4, 11, 16, 28, 29, 72, 73, 75, 77, 78, 79, 80], "workspac": [1, 29, 30, 70, 73, 82, 89, 93], "workstat": 22, "world": [0, 2, 7, 20, 27, 29, 55, 56, 57, 71, 73, 75, 76, 77, 82], "world_config": 87, "world_siz": [15, 19, 82, 93], "worldconfig": [0, 6, 87], "worldsiz": 1, "wors": [12, 29, 77], "worst": [79, 80], "worth": [5, 8, 77, 80], "would": [0, 7, 12, 27, 73, 75, 77, 79, 81, 82, 95], "wouldn": 51, "wpa2": 45, "wqr": 26, "wrap": [0, 1, 16, 29, 69, 75, 82, 85, 87, 93], "wrapped_properti": 70, "wrapper": [1, 7, 19, 97], "write": [0, 1, 9, 17, 26, 29, 64, 82, 92], "written": [16, 73, 82], "wrong": [12, 51, 93], "wsl": 93, "wuk": 26, "wuq": 26, "wuv": 26, "www": 93, "x": [0, 1, 3, 6, 10, 13, 30, 73, 82, 83, 84, 88, 90, 93], "x86": 9, "x86_64": 91, "xcomposer2": 93, "xgrammar": [0, 3, 45, 93], "xl": 93, "xml": 3, "xor": 82, "xqa": 93, "xxx": [17, 19, 92], "xxx_plugin": 85, "xy": 82, "y": [2, 3, 20, 24, 31, 65, 66, 67, 73, 82, 84, 90], "y_bia": 82, "yaml": [30, 73, 74], "yarn": 82, "ye": [2, 82, 89], "yeah": 58, "yelp": 91, "yen": 73, "yet": [0, 6, 19, 22, 26, 82, 99], "yield": [9, 28, 36, 77, 79], "yiyixu": [34, 61], "yml": [20, 27, 30, 37, 73, 74], "york": [30, 33, 35, 60, 62, 88], "you": [3, 4, 5, 6, 7, 9, 10, 12, 15, 16, 18, 19, 20, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 45, 46, 49, 52, 55, 56, 57, 58, 59, 60, 61, 64, 65, 67, 69, 70, 73, 74, 76, 77, 78, 79, 80, 81, 82, 87, 88, 89, 92, 93, 94, 95, 97], "your": [9, 10, 11, 12, 18, 19, 20, 25, 27, 29, 31, 32, 36, 58, 65, 67, 69, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 88, 92, 95, 97, 98], "your_data_path": [20, 27], "your_dockerhub_usernam": [31, 32], "your_model_dir": 27, "your_model_path": 20, "your_public_kei": 32, "your_work_path": 20, "yourself": 94, "yuhuili": [43, 44], "yyi": 92, "z": 82, "zars19": 93, "zero": [0, 1, 3, 17, 69, 70, 82, 83, 90, 92], "zero_is_placehold": 82, "zip": 52, "zjli2013": 93, "zoo": 93}, "titles": ["Executor", "Runtime", "Disaggregated-Service (experimental)", "Executor API", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "KV Cache Management: Pools, Blocks, and Events", "KV cache reuse", "Run gpt-2b + LoRA using Executor / cpp runtime", "Low-Precision-AllReduce", "Speculative Sampling", "Running With Weight Streaming to Reduce GPU Memory Consumption", "Adding a Model", "TensorRT-LLM Checkpoint", "Model Definition", "TensorRT-LLM Model Weights Loader", "TensorRT-LLM Architecture", "TensorRT-LLM Build Workflow", "How to get best performance on DeepSeek-R1 in TensorRT-LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs", "DeepSeek R1 MTP Implementation and Optimization", "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers", "trtllm-build", "trtllm-serve", "Build the TensorRT-LLM Docker Image", "Develop TensorRT-LLM on Runpod", "Curl Chat Client", "Curl Chat Client For Multimodal", "Curl Completion Client", "LLM Common Customizations", "Deepseek R1 Reasoning Parser", "Genai Perf Client", "Genai Perf Client For Multimodal", "LLM Examples Introduction", "LLM Examples", "Automatic Parallelism with LLM", "Generate Text Using Eagle2 Decoding", "Generate Text Using Eagle Decoding", "Generate text with guided decoding", "Generate text", "Generate Text Asynchronously", "Generate Text in Streaming", "Generate text with customization", "Distributed LLM Generation", "Get KV Cache Events", "Control generated text using logits processor", "Generate Text Using Lookahead Decoding", "Generate Text Using Medusa Decoding", "Llm Mgmn Llm Distributed", "Llm Mgmn Trtllm Bench", "Llm Mgmn Trtllm Serve", "Generate text with multiple LoRA adapters", "Generation with Quantization", "OpenAI Chat Client", "OpenAI Chat Client", "OpenAI Completion Client", "Online Serving Examples", "Welcome to TensorRT-LLM\u2019s Documentation!", "Building from Source Code on Linux", "Installing on Grace Hopper", "Installing on Linux", "Key Features", "API Introduction", "API Reference", "Overview", "Performance Analysis", "TensorRT-LLM Benchmarking", "Overview", "Benchmarking Default Performance", "Deciding Model Sharding Strategy", "FP8 Quantization", "Performance Tuning Guide", "Tuning Max Batch Size and Max Num Tokens", "Useful Build-Time Flags", "Useful Runtime Options", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Quick Start Guide", "Memory Usage of TensorRT-LLM", "Numerical Precision", "Support Matrix", "Troubleshooting", "Release Notes", "PyTorch Backend", "Adding a New Model in PyTorch Backend", "Architecture Ovewiew", "Attention", "KV Cache Manager", "Scheduler"], "titleterms": {"": [5, 22, 25, 64], "0": 93, "000": [22, 23], "1": [14, 16, 20, 65, 74, 89, 93], "10": [22, 93], "100m": 22, "11": 93, "12": [23, 93], "13": 93, "13b": 23, "14": 93, "15": 93, "16": 93, "17": 93, "18": 93, "180b": 21, "19": 93, "2": [14, 20, 24, 65, 89, 93], "2b": 10, "3": [14, 16, 20, 73, 74, 89, 91], "4": [14, 20, 22], "405b": [16, 74], "4x": 24, "5": 20, "6": [20, 21], "6x": 22, "7": 93, "70b": [16, 21, 24, 73, 74], "7x": 21, "8": 93, "8b": 74, "9": 93, "A": 28, "As": 3, "For": [34, 39], "In": [3, 5, 71], "Not": 89, "One": [26, 65], "The": [3, 90], "To": 75, "With": [13, 71], "a100": [21, 22], "about": [12, 30, 71, 76], "absorb": 28, "accept": [26, 27], "access": 31, "account": 32, "accuraci": [11, 25, 27], "achiev": [22, 23, 27], "acknowledg": [26, 27, 28], "activ": [83, 89], "ad": [14, 95], "adapt": 58, "addit": 3, "adp": 28, "advanc": 64, "algorithm": 11, "alibi": 5, "allreduc": 11, "an": 8, "analysi": 72, "announc": 93, "api": [3, 7, 13, 19, 30, 40, 69, 70, 75, 88, 93, 96], "arbitrari": 3, "architectur": [18, 26, 64, 96], "argument": 29, "asynchron": 47, "asyncio": 36, "attent": [5, 15, 26, 27, 28, 71, 79, 80, 81, 83, 97], "attentionbackend": 97, "attentionmetadata": 97, "auto": 29, "automat": 42, "autoregress": 26, "avoid": 75, "awq": [15, 21, 90], "b200": [20, 26], "backend": [26, 91, 94, 95, 97], "background": [26, 27], "balanc": 26, "base": [27, 36], "baselin": 77, "basic": 27, "batch": [3, 5, 71, 79], "beam": [3, 5], "befor": [73, 75], "begin": 75, "behavior": 73, "bench": [56, 72, 75], "benchmark": [2, 20, 25, 30, 73, 74, 75], "best": [20, 25], "bf16": 90, "bia": 5, "bind": [3, 16, 65], "blackwel": [28, 90], "block": 8, "blockmanag": 8, "boost": 73, "boundari": 26, "budget": 24, "buffer": [5, 77, 89], "buffermanag": 1, "build": [15, 19, 20, 29, 31, 32, 36, 65, 73, 75, 80], "c": [3, 6, 65, 89], "cach": [5, 8, 9, 15, 51, 77, 81, 89, 98], "cachecommun": 0, "can": [9, 71], "capac": 81, "case": 79, "cast": 83, "caveat": 73, "chang": [13, 79, 93], "chat": [30, 33, 34, 60, 61], "checkpoint": 15, "choos": 25, "chunk": [5, 20, 79, 81], "class": 3, "classic": 7, "cli": [19, 75], "client": [33, 34, 35, 38, 39, 60, 61, 62], "clock": [20, 73], "close": [21, 24], "code": 65, "collect": 72, "combin": 20, "come": 25, "command": 74, "common": [1, 36, 71], "commun": [26, 76], "compil": [16, 20, 65, 88], "complet": [30, 35, 62], "compon": [6, 94], "conclus": [77, 79, 80], "config": [15, 29], "configur": [3, 6, 10, 26, 32, 36, 77, 80, 95], "connect": 32, "consider": 11, "consumpt": 13, "contain": [20, 31, 65], "content": [20, 26, 27, 28, 78, 95], "context": [3, 5, 20, 79, 80, 81], "contigu": 5, "control": [3, 52], "conv": 83, "convers": [14, 19], "coordin": 72, "core": 95, "cpp": 10, "creat": [32, 65], "cross": 5, "cuda": 26, "cudaev": 1, "cudastream": 1, "curl": [33, 34, 35], "custom": [17, 36, 49, 98, 99], "cutlass": 26, "cyclic": 5, "data": 28, "dataset": [20, 73, 74, 75], "datatransceiverst": 0, "debug": [2, 72, 92], "decid": 76, "decod": [3, 12, 27, 29, 43, 44, 45, 53, 54, 89, 96], "decoderst": 1, "decodinginput": 1, "decodingoutput": 1, "decor": 7, "deep": 28, "deepseek": [20, 26, 27, 28, 37], "default": [20, 26, 73, 75], "definit": [16, 88, 95], "dens": 26, "depend": 26, "deploi": 88, "dequant": 90, "descript": 72, "detail": [10, 90], "develop": [28, 32, 94], "diagram": 26, "differ": 3, "disabl": 36, "disaggreg": [2, 30], "disaggregated_mpi_work": 30, "disaggserverutil": 0, "distribut": [50, 55], "dive": 28, "do": 71, "docker": [31, 32, 65], "dockerhub": [31, 32], "document": [64, 93], "dora": 10, "download": 20, "dq": 90, "draft": 12, "e2": 92, "eagl": [12, 27, 44], "eagle2": 43, "eagle3": 27, "eaglebuff": 1, "eaglemodul": 1, "embed": [5, 83], "enabl": [4, 9, 20, 31, 72, 77, 80], "endpoint": 30, "engin": [15, 16, 69, 73, 75, 88, 96], "enhanc": 93, "environ": [2, 11], "ep": 28, "error": 92, "etp": 26, "evalu": [15, 27], "event": [8, 51], "everyth": 26, "exampl": [2, 3, 10, 15, 16, 17, 40, 41, 63, 72, 73], "except": 89, "execut": 92, "executor": [0, 3, 10], "expect": [9, 20], "experiment": 2, "expert": [4, 26, 28], "explicitdrafttokensbuff": 1, "explor": 20, "face": 69, "factor": [5, 15], "falcon": 21, "faq": [2, 89], "faster": 21, "featur": [20, 68, 72, 93], "file": 65, "first": 22, "fix": [27, 93], "flag": [80, 90], "flayerinfo": 7, "flight": [3, 5, 71], "flow": 73, "fmha": 5, "format": [10, 20], "fp16": 90, "fp32": 90, "fp4": 74, "fp8": [5, 15, 22, 71, 74, 77, 90], "fraction": 81, "free": 81, "from": 65, "full": 65, "fulli": 17, "function": [7, 17, 82], "fuse_a_gemm": 26, "fusion": [16, 26, 77, 80], "futur": [26, 27, 28, 36], "garbag": 72, "gate": 77, "gc": 72, "gemm": [26, 77, 80], "genai": [38, 39], "gener": [2, 5, 36, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 58, 59], "get": [20, 51, 64], "gil": 72, "gpt": [6, 10], "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptq": 90, "gpu": [13, 16, 20, 21, 26, 28, 71, 73, 81, 89], "grace": 66, "graph": [7, 26], "group": [5, 26], "guid": [3, 45, 78, 88, 94, 95], "h": [0, 1], "h100": [22, 23], "h200": [20, 21, 23, 24], "ha": 22, "hardwar": 91, "hbm": 23, "head": 5, "header": 65, "hierarchi": 8, "high": 7, "hopper": [66, 90], "host": 9, "how": [4, 9, 20, 26, 27, 28, 73, 76, 79], "hub": 69, "hug": 69, "i": [22, 76, 89], "ibuff": 1, "id": 10, "igptdecoderbatch": 1, "imag": [31, 32, 65], "implement": [14, 26, 27, 97], "import": 5, "improv": 12, "increas": 24, "indic": 64, "infer": [3, 25, 27, 30, 71, 88, 89], "inform": [7, 72, 88], "infrastructur": 93, "input": 5, "instal": [20, 64, 66, 67, 92], "int4": [21, 90], "int8": [5, 90], "interfac": 98, "intern": 6, "introduct": [28, 40, 69, 95, 98, 99], "ipcnvlsmemori": 1, "ipcutil": 1, "isl": 20, "issu": [20, 27, 89, 93, 94], "itensor": 1, "iter": 72, "kei": [17, 26, 32, 68, 76, 93, 94], "kernel": [24, 26], "knowledg": 78, "known": [27, 65, 89, 93, 94], "kv": [5, 8, 9, 15, 51, 77, 81, 89, 98], "kvcacheeventmanag": 8, "kvcachemanag": 96, "latenc": [20, 24, 26, 73, 75, 77], "latest": [23, 71], "launch": [26, 72], "layer": [26, 28, 83], "layernorm": 15, "layout": 17, "level": [7, 26, 96], "limit": [12, 65, 73, 93], "linear": 83, "link": 65, "linux": [65, 67], "llama": [16, 21, 24, 73, 74, 77, 80], "llama2": 23, "llm": [4, 12, 15, 17, 18, 19, 20, 22, 23, 25, 27, 31, 32, 36, 40, 41, 42, 50, 55, 56, 57, 64, 65, 69, 71, 73, 75, 79, 88, 89, 91, 93], "load": [17, 95], "loader": 17, "local": 69, "logit": [3, 29, 52], "lookahead": [12, 53], "lookaheadbuff": 1, "lookaheadmodul": 1, "lookup": 12, "lora": [10, 29, 58], "loracach": [1, 10], "loracachepagemanagerconfig": 1, "loramodul": 1, "low": [11, 73, 77], "make": 15, "manag": [7, 8, 73, 98], "map": [10, 73], "mark": 3, "marker": 72, "match": 16, "matrix": [90, 91], "max": [20, 73, 79, 81], "maximum": 81, "measur": 74, "medusa": [12, 54, 73], "medusamodul": 1, "memori": [9, 13, 20, 23, 81, 89], "memorycount": 1, "method": [7, 25], "metric": 30, "mgmn": [55, 56, 57], "min": 20, "mix": 26, "mixtur": 4, "mla": 28, "mlp": [15, 77, 83], "mlperf": 22, "modal": [73, 91], "mode": 73, "model": [6, 12, 14, 16, 17, 18, 20, 26, 27, 69, 73, 74, 76, 77, 80, 84, 88, 91, 92, 93, 95, 96], "modelconfig": 1, "modul": [7, 10, 27, 28], "moe": [4, 28], "moe_backend": 26, "more": [20, 24, 72], "mqa": 28, "mtp": [26, 27], "multi": [5, 16, 26, 30, 71, 73, 91], "multimod": [30, 34, 39], "multipl": [58, 80], "name": [17, 29], "nativ": [17, 71], "nearli": 23, "network": 73, "new": [14, 24, 95, 97], "next": [25, 88], "node": [16, 30, 71], "non": 73, "norm": [77, 80], "normal": 83, "note": [3, 5, 93], "nsight": 72, "num": 79, "numer": 90, "nvfp4": 90, "nvidia": [26, 28, 72], "nvtx": 72, "o": 89, "obtain": 3, "offload": 9, "onli": [26, 65, 72, 90], "onlin": 63, "openai": [60, 61, 62], "optim": [5, 26, 27, 28, 80], "option": [20, 65, 77, 80, 81], "osl": 20, "other": 73, "out": [20, 95], "output": [3, 73], "over": 21, "overview": [6, 15, 17, 19, 71, 74], "ovewiew": 96, "own": 99, "p": 9, "pack": 5, "pad": 5, "page": [5, 8, 71, 79, 80, 81], "parallel": [4, 10, 26, 28, 29, 42, 73, 76, 80], "paramet": 6, "parser": 37, "part": 14, "pattern": [7, 16], "perf": [38, 39], "perform": [9, 11, 12, 20, 22, 25, 26, 64, 72, 75, 77, 78, 80], "persist": 73, "phase": 5, "pipelin": [76, 80], "pitfal": 75, "plugin": [16, 29, 77, 80, 85], "pod": 32, "polici": 81, "pool": [8, 83, 89], "posit": 5, "post": 3, "postprocess": 17, "power": 73, "practic": 25, "precis": [11, 26, 28, 90], "prepar": [15, 20, 32, 69, 73, 74, 75], "prerequisit": [20, 65, 78, 88, 95], "prevent": 9, "processor": [3, 52], "profil": [26, 72, 80], "programmat": 26, "prompt": 12, "prompttuningparam": 1, "provid": 24, "push": 26, "pyexecutor": 96, "python": [3, 65, 89], "pytorch": [72, 73, 91, 94, 95], "q": 90, "qkv": 5, "quantiz": [15, 19, 25, 36, 59, 73, 77, 86, 90, 94], "quantmod": 90, "queri": 5, "quick": [88, 94], "quickstart": 73, "r1": [20, 26, 27, 28, 37], "rab": 5, "rank": 15, "rawengin": 1, "re": 26, "reason": 37, "recommend": [77, 80, 89], "record_signatur": 7, "redraft": 12, "reduc": [13, 77, 80], "refer": [14, 64, 70], "regist": 14, "registr": 95, "rel": 5, "relat": [7, 88], "relax": [26, 27], "releas": 93, "reproduc": [20, 26, 28, 74], "request": [1, 3], "requir": [7, 11], "resourcemanag": 96, "respons": 3, "result": [3, 20, 72, 74, 75], "retriev": 7, "reus": 9, "revisit": 79, "rewrit": 7, "right": 25, "roll": 5, "rope": 5, "rotari": 5, "router": 26, "routergemm": 26, "run": [10, 13, 20, 27, 72, 73, 74, 75, 88], "runpod": 32, "runtim": [1, 6, 10, 16, 28, 36, 65, 81, 87, 89], "runtimedefault": 1, "same": 24, "sampl": [6, 12, 36], "samplingconfig": 1, "save": 75, "scale": [5, 15], "scatter": 80, "schedul": [79, 81, 96, 99], "script": [41, 63], "search": 5, "sec": 23, "send": 3, "serial": 0, "serv": [30, 57, 63, 72, 88], "server": [3, 30, 88], "servic": 2, "set": [73, 76], "shard": 76, "shoot": 17, "singl": 21, "situat": 9, "size": [79, 81, 89], "slide": 5, "slurm": 30, "smart": 26, "smoothquant": 90, "softwar": 91, "sota": 25, "sourc": 65, "spars": 26, "specif": 72, "specul": [12, 27, 29], "speculativedecodingmod": 1, "speculativedecodingmodul": 1, "speed": 25, "speedup": 27, "ssh": [31, 32], "start": [30, 64, 88, 94], "step": [14, 20, 65, 88, 95], "strategi": [26, 28, 76], "stream": [13, 26, 48], "streamingllm": 5, "structur": 3, "studi": [27, 79], "style": 36, "subcommand": 73, "summari": [73, 77, 80], "support": [16, 17, 20, 27, 65, 69, 71, 73, 90, 91], "swiglu": 77, "syntax": 30, "system": [26, 72], "tabl": [20, 26, 27, 28, 64, 78, 95], "target": 12, "technic": 90, "techniqu": 25, "templat": 32, "tensor": [0, 3, 4, 5, 7, 10, 76, 89], "tensorrt": [4, 12, 15, 16, 17, 18, 19, 20, 22, 23, 25, 27, 31, 32, 64, 65, 69, 71, 73, 75, 79, 88, 89, 91, 93], "test": 92, "text": [43, 44, 45, 46, 47, 48, 49, 52, 53, 54, 58], "think": 76, "throughput": [20, 24, 28, 73, 74, 75], "time": [80, 89], "tip": [69, 75, 92], "tllmlogger": 1, "tok": 22, "token": [22, 23, 36, 79, 81], "tool": 19, "top": 96, "topologi": 11, "transferag": 0, "translat": 17, "tree": [12, 27, 95], "trigger": 8, "triton": [3, 88], "troubl": 17, "troubleshoot": [2, 69, 75, 92], "trt": 25, "trtllm": [26, 29, 30, 56, 57, 72, 75, 88], "tune": [9, 20, 78, 79], "type": [0, 8], "understand": [79, 89], "unit": 92, "up": [21, 24, 25], "updat": 93, "upload": [31, 32], "us": [7, 10, 12, 43, 44, 52, 53, 54, 80, 81, 89], "usag": [2, 11, 89], "user": 77, "v": [4, 23], "valid": 73, "vanilla": 27, "variabl": [2, 11, 74], "verif": 26, "verifi": 14, "via": 75, "visual": 72, "w4a16": 90, "w8a16": 90, "w8a8": 90, "weight": [13, 14, 15, 16, 17, 18, 28, 89, 90, 95], "welcom": 64, "what": [8, 22, 25, 71], "when": [7, 26], "width": 3, "window": [5, 71, 81], "windowblockmanag": 8, "wip": 20, "within": 24, "without": 65, "work": [26, 27, 28, 73], "workflow": [7, 17, 19, 72, 73], "workload": 26, "world": 6, "worldconfig": 1, "write": 14, "xqa": [5, 24], "you": [71, 75], "your": 99}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"1. Download TensorRT-LLM": [[20, "download-tensorrt-llm"]], "1. Weights size": [[90, "weights-size"]], "2. Activation size": [[90, "activation-size"]], "2. Download the DeepSeek R1 models": [[20, "download-the-deepseek-r1-models"]], "3. Build and run TensorRT-LLM container": [[20, "build-and-run-tensorrt-llm-container"]], "3. I/O tensors": [[90, "i-o-tensors"]], "3.1 Runtime and decoder buffers except KV cache tensor": [[90, "runtime-and-decoder-buffers-except-kv-cache-tensor"]], "3.2 KV cache tensor": [[90, "kv-cache-tensor"]], "4. Compile and Install TensorRT-LLM": [[20, "compile-and-install-tensorrt-llm"]], "5. Optional: Tune GPU clocks": [[20, "optional-tune-gpu-clocks"]], "6. Dataset preparation": [[20, "dataset-preparation"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[7, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "ALiBi": [[5, "alibi"]], "API": [[3, "api"]], "API Changes": [[13, "api-changes"], [94, "api-changes"], [94, "id9"], [94, "id14"], [94, "id19"], [94, "id24"], [94, "id31"], [94, "id36"], [94, "id42"], [94, "id48"], [94, "id54"]], "API Introduction": [[70, null]], "API Reference": [[71, null]], "AWQ Quantization Scaling Factors": [[15, "awq-quantization-scaling-factors"]], "About": [[31, "about"]], "About Speculative Sampling": [[12, "about-speculative-sampling"]], "About TensorRT-LLM": [[72, "about-tensorrt-llm"]], "Accuracy": [[25, "accuracy"]], "Accuracy studies for Relaxed Acceptance": [[27, "accuracy-studies-for-relaxed-acceptance"]], "Achieving speedup with MTP speculative decoding": [[27, "achieving-speedup-with-mtp-speculative-decoding"]], "Acknowledgement": [[29, "acknowledgement"]], "Acknowledgment": [[26, "acknowledgment"], [27, "acknowledgment"], [28, "acknowledgment"]], "Activation": [[84, "module-tensorrt_llm.layers.activation"]], "Adding a Model": [[14, null]], "Adding a New Model in PyTorch Backend": [[96, null]], "Advanced": [[65, null]], "Algorithm": [[11, "algorithm"]], "Announcements": [[94, "announcements"], [94, "id52"]], "Architecture": [[65, null]], "Architecture Ovewiew": [[97, null]], "Asyncio-Based Generation": [[37, "asyncio-based-generation"]], "Attention": [[84, "module-tensorrt_llm.layers.attention"], [98, null]], "Attention Backends": [[98, "attention-backends"]], "Attention Kernel": [[26, "attention-kernel"]], "Attention Weights": [[15, "attention-weights"]], "Attention for MTP": [[27, "attention-for-mtp"]], "Auto parallel arguments": [[30, "tensorrt_llm.commands.build-parse_arguments-auto-parallel-arguments"]], "Automatic Parallelism with LLM": [[43, null]], "Autoregressive MTP Layers": [[26, "autoregressive-mtp-layers"]], "B200 max-throughput": [[20, "b200-max-throughput"]], "B200 min-latency": [[20, "b200-min-latency"]], "Background": [[26, "background"], [27, "background"]], "Basic Implementation": [[27, "basic-implementation"]], "Beam-Search": [[5, "beam-search"]], "Before Benchmarking": [[74, "before-benchmarking"]], "Before You Begin: TensorRT-LLM LLM-API": [[76, "before-you-begin-tensorrt-llm-llm-api"]], "Benchmark": [[20, "benchmark"], [25, "benchmark"], [31, "benchmark"]], "Benchmarking Default Performance": [[76, null]], "Benchmarking a non-Medusa Low Latency Engine": [[74, "benchmarking-a-non-medusa-low-latency-engine"]], "Benchmarking with trtllm-bench": [[76, "benchmarking-with-trtllm-bench"]], "Benchmarks": [[2, "benchmarks"]], "Best practices to choose the right quantization methods": [[25, "best-practices-to-choose-the-right-quantization-methods"]], "Block": [[8, "block"]], "Boost settings": [[74, "boost-settings"]], "Build APIs": [[19, "build-apis"]], "Build Checkpoint into TensorRT Engine": [[15, "build-checkpoint-into-tensorrt-engine"]], "Build Configuration": [[37, "build-configuration"]], "Build TensorRT-LLM": [[66, "build-tensorrt-llm"]], "Build the TensorRT-LLM Docker Image": [[32, null]], "Build the TensorRT-LLM Docker Image and Upload to DockerHub": [[32, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"], [33, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"]], "Building a Benchmark Engine": [[74, "building-a-benchmark-engine"]], "Building a Medusa Low-Latency Engine": [[74, "building-a-medusa-low-latency-engine"]], "Building a TensorRT-LLM Docker Image": [[66, "building-a-tensorrt-llm-docker-image"]], "Building and Saving Engines via CLI": [[76, "building-and-saving-engines-via-cli"]], "Building and Saving the Engine": [[76, "building-and-saving-the-engine"]], "Building from Source Code on Linux": [[66, null]], "Building the Python Bindings for the C++ Runtime": [[66, "building-the-python-bindings-for-the-c-runtime"]], "C++ Executor API Example": [[3, "c-executor-api-example"]], "C++ GPT Runtime": [[6, null]], "C++ extension": [[29, "c-extension"]], "C++ runtime": [[90, "c-runtime"], [90, "id1"]], "CLI Tools": [[19, "cli-tools"]], "CUDA Graph & Programmatic Dependent Launch": [[26, "cuda-graph-programmatic-dependent-launch"]], "CUTLASS Backend (default backend)": [[26, "cutlass-backend-default-backend"]], "Capacity Scheduler Policy": [[82, "capacity-scheduler-policy"]], "Cast": [[84, "module-tensorrt_llm.layers.cast"]], "Chat API": [[31, "chat-api"]], "Chunked Context": [[5, "chunked-context"]], "Classical Workflow": [[7, "classical-workflow"]], "Closing": [[21, "closing"], [24, "closing"]], "Collect PyTorch profiler results": [[73, "collect-pytorch-profiler-results"]], "Command Overview": [[75, "command-overview"]], "Common LLM Support": [[72, "common-llm-support"]], "Communication Kernel": [[26, "communication-kernel"]], "Compilation": [[16, "compilation"]], "Compile the Model into a TensorRT Engine": [[89, "compile-the-model-into-a-tensorrt-engine"]], "Completions API": [[31, "completions-api"], [31, "id1"]], "Conclusion": [[78, "conclusion"], [80, "conclusion"], [81, "conclusion"]], "Config": [[15, "config"]], "Configure SSH Key": [[33, "configure-ssh-key"]], "Configure The Executor": [[3, "configure-the-executor"]], "Connect to the Pod": [[33, "connect-to-the-pod"]], "Context Chunking Policy": [[82, "context-chunking-policy"]], "Context Phase": [[5, "context-phase"]], "Context and Generation Phases": [[5, "context-and-generation-phases"]], "Contiguous KV Cache": [[5, "contiguous-kv-cache"]], "Control generated text using logits processor": [[53, null]], "Controlling output with Logits Post-Processor": [[3, "controlling-output-with-logits-post-processor"]], "Conv": [[84, "module-tensorrt_llm.layers.conv"]], "Conversion APIs": [[19, "conversion-apis"]], "Coordinating with NVIDIA Nsight Systems Launch": [[73, "coordinating-with-nvidia-nsight-systems-launch"]], "Coordinating with PyTorch profiler (PyTorch workflow only)": [[73, "coordinating-with-pytorch-profiler-pytorch-workflow-only"]], "Core Models": [[96, "core-models"]], "Core implementations of the GPU logic": [[29, "core-implementations-of-the-gpu-logic"]], "Core implementations of the host logic": [[29, "core-implementations-of-the-host-logic"]], "Create a Pod Template": [[33, "create-a-pod-template"]], "Create a Runpod account": [[33, "create-a-runpod-account"]], "Create the Container": [[66, "create-the-container"]], "Cross Attention": [[5, "cross-attention"]], "Curl Chat Client": [[34, null]], "Curl Chat Client For Multimodal": [[35, null]], "Curl Completion Client": [[36, null]], "Customize KV Cache Manager": [[99, "customize-kv-cache-manager"]], "Customize Your Own Scheduler": [[100, "customize-your-own-scheduler"]], "Data Parallel for Attention module (ADP)": [[28, "data-parallel-for-attention-module-adp"]], "Debug Execution Errors": [[93, "debug-execution-errors"]], "Debug on E2E Models": [[93, "debug-on-e2e-models"]], "Debug on Unit Tests": [[93, "debug-on-unit-tests"]], "Debugging FAQs": [[2, "debugging-faqs"]], "Deciding Model Sharding Strategy": [[77, null]], "Decoder": [[97, "decoder"]], "DeepSeek R1 MTP Implementation and Optimization": [[27, null]], "Deepseek R1 Reasoning Parser": [[38, null]], "Default Build Behavior": [[74, "default-build-behavior"]], "Dense GEMM optimization": [[26, "dense-gemm-optimization"]], "Deploy with Triton Inference Server": [[89, "deploy-with-triton-inference-server"]], "Deploy with trtllm-serve": [[89, "deploy-with-trtllm-serve"]], "Develop TensorRT-LLM on Runpod": [[33, null]], "Developer Guide": [[95, "developer-guide"]], "Disable Tokenizer": [[37, "disable-tokenizer"]], "Disaggregated-Service (experimental)": [[2, null]], "Distributed LLM Generation": [[51, null]], "DoRA": [[10, "dora"]], "Documentation": [[94, "documentation"], [94, "id28"]], "Draft-Target-Model": [[12, "draft-target-model"]], "E2E evaluation": [[29, "e2e-evaluation"]], "EAGLE": [[12, "eagle"]], "EP Load Balancer": [[29, "ep-load-balancer"]], "EP communication kernels": [[29, "ep-communication-kernels"]], "EP communication kernels implementation": [[29, "ep-communication-kernels-implementation"]], "Eagle3 support": [[27, "eagle3-support"]], "Embedding": [[84, "module-tensorrt_llm.layers.embedding"]], "Enable GIL information in NVTX markers": [[73, "enable-gil-information-in-nvtx-markers"]], "Enable garbage collection (GC) NVTX markers": [[73, "enable-garbage-collection-gc-nvtx-markers"]], "Enable kv cache reuse for p-tuning": [[9, "enable-kv-cache-reuse-for-p-tuning"]], "Enable more NVTX markers for debugging": [[73, "enable-more-nvtx-markers-for-debugging"]], "Enable ssh access to the container": [[32, "enable-ssh-access-to-the-container"]], "Enabling GEMM + SwiGLU Fusion": [[78, "enabling-gemm-swiglu-fusion"]], "Enabling GEMM Plugin": [[81, "enabling-gemm-plugin"]], "Enabling Low Latency GEMM plugin": [[78, "enabling-low-latency-gemm-plugin"]], "Enabling Paged Context Attention": [[81, "enabling-paged-context-attention"]], "Enabling Quantization": [[78, "enabling-quantization"]], "Enabling Quantized KV Cache": [[78, "enabling-quantized-kv-cache"]], "Enabling Reduce Norm Fusion Plugin": [[81, "enabling-reduce-norm-fusion-plugin"]], "Enabling Reduce Norm Fusion with User Buffers": [[78, "enabling-reduce-norm-fusion-with-user-buffers"]], "Enabling building with multiple profiles": [[81, "enabling-building-with-multiple-profiles"]], "Environment Variables": [[2, "environment-variables"], [11, "environment-variables"]], "Evaluation": [[27, "evaluation"]], "Events in KVCacheEventManager": [[8, "events-in-kvcacheeventmanager"]], "Everything in One Diagram": [[26, "everything-in-one-diagram"]], "Example": [[2, "example"], [15, "example"]], "Example LoRA tensors": [[10, "example-lora-tensors"]], "Example of Build Subcommand Output:": [[74, "example-of-build-subcommand-output"]], "Examples": [[16, "examples"], [17, "examples"], [73, "examples"]], "Executor": [[0, null]], "Executor API": [[3, null]], "Expanded thoughts": [[29, "expanded-thoughts"]], "Expected Result Format": [[20, "expected-result-format"], [20, "id1"], [20, "id2"]], "Expected Results": [[20, "expected-results"]], "Expert Parallelism in TensorRT-LLM": [[4, null]], "Expert parallel for MoE (EP)": [[28, "expert-parallel-for-moe-ep"]], "Exploring more ISL/OSL combinations": [[20, "exploring-more-isl-osl-combinations"]], "FAQ": [[90, "faq"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[7, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "FP32, FP16 and BF16": [[91, "fp32-fp16-and-bf16"]], "FP4 Models:": [[75, "fp4-models"]], "FP8 (Hopper)": [[91, "fp8-hopper"]], "FP8 Context FMHA": [[5, "fp8-context-fmha"]], "FP8 Models:": [[75, "fp8-models"]], "FP8 Quantization": [[78, null]], "FP8 Quantization Scaling Factors": [[15, "fp8-quantization-scaling-factors"]], "FP8 Support": [[72, "fp8-support"]], "FP8 \u201cBaseline\u201d Performance": [[78, "fp8-baseline-performance"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[21, null]], "Falcon-180B on a single H200 with INT4 AWQ": [[21, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Feature Descriptions": [[73, "feature-descriptions"]], "Fix known issues": [[27, "fix-known-issues"]], "Fixed Issues": [[94, "fixed-issues"], [94, "id11"], [94, "id15"], [94, "id21"], [94, "id26"], [94, "id33"], [94, "id38"], [94, "id44"], [94, "id50"], [94, "id56"], [94, "id61"]], "Fully customized": [[17, "fully-customized"]], "Functionals": [[83, null]], "Fuse_A_GEMM": [[26, "fuse-a-gemm"]], "Future Works": [[26, "future-works"], [27, "future-works"], [28, "future-works"]], "Future-Style Generation": [[37, "future-style-generation"]], "GEMM + SwiGLU Fusion in Gated-MLP": [[78, "gemm-swiglu-fusion-in-gated-mlp"]], "GEMM Plugin": [[81, "gemm-plugin"]], "GPTQ and AWQ (W4A16)": [[91, "gptq-and-awq-w4a16"]], "GPU Clock Management": [[74, "gpu-clock-management"]], "Genai Perf Client": [[39, null]], "Genai Perf Client For Multimodal": [[40, null]], "General FAQs": [[2, "general-faqs"]], "Generate Text Asynchronously": [[48, null]], "Generate Text Using Eagle Decoding": [[45, null]], "Generate Text Using Eagle2 Decoding": [[44, null]], "Generate Text Using Lookahead Decoding": [[54, null]], "Generate Text Using Medusa Decoding": [[55, null]], "Generate Text in Streaming": [[49, null]], "Generate text": [[47, null]], "Generate text with customization": [[50, null]], "Generate text with guided decoding": [[46, null]], "Generate text with multiple LoRA adapters": [[59, null]], "Generation": [[37, "generation"]], "Generation Phase": [[5, "generation-phase"]], "Generation with Quantization": [[60, null]], "Get KV Cache Events": [[52, null]], "Getting Started": [[65, null]], "Graph Rewriting APIs": [[7, "graph-rewriting-apis"]], "Graph Rewriting Module": [[7, null]], "Grouped GEMM": [[26, "grouped-gemm"]], "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token": [[22, null]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM": [[23, null]], "H200 max-throughput": [[20, "h200-max-throughput"]], "H200 min-latency": [[20, "h200-min-latency"]], "H200 vs H100": [[23, "h200-vs-h100"]], "Hardware": [[92, "hardware"]], "Hierarchy: Pool, Block, and Page": [[8, "hierarchy-pool-block-and-page"]], "High-level design introduction": [[29, "high-level-design-introduction"]], "How the Benchmarker Works": [[74, "how-the-benchmarker-works"]], "How to Enable": [[4, "how-to-enable"]], "How to Think about Model Sharding: Communication is Key": [[77, "how-to-think-about-model-sharding-communication-is-key"]], "How to change Max Batch Size": [[80, "how-to-change-max-batch-size"]], "How to change Max Num Tokens": [[80, "how-to-change-max-num-tokens"]], "How to enable kv cache reuse": [[9, "how-to-enable-kv-cache-reuse"]], "How to get best performance on DeepSeek-R1 in TensorRT-LLM": [[20, null]], "How to reproduce": [[26, "how-to-reproduce"], [28, "how-to-reproduce"]], "How to run DeepSeek models with MTP": [[27, "how-to-run-deepseek-models-with-mtp"]], "How to run the DeepSeek-R1 model with Relaxed Acceptance": [[27, "how-to-run-the-deepseek-r1-model-with-relaxed-acceptance"]], "How to set Tensor Parallelism and Pipeline Parallelism": [[77, "how-to-set-tensor-parallelism-and-pipeline-parallelism"]], "Hugging Face Hub": [[70, "hugging-face-hub"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[91, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "INT8 SmoothQuant (W8A8)": [[91, "int8-smoothquant-w8a8"]], "INT8/FP8 KV Caches": [[5, "int8-fp8-kv-caches"]], "Implement AttentionBackend": [[98, "implement-attentionbackend"]], "Implement AttentionMetadata": [[98, "implement-attentionmetadata"]], "Implement a New Attention Backend": [[98, "implement-a-new-attention-backend"]], "Implementation Configuration": [[26, "implementation-configuration"]], "Important Note": [[5, "important-note"]], "In-Flight Batching and Paged Attention": [[72, "in-flight-batching-and-paged-attention"]], "In-flight Batching": [[5, "in-flight-batching"]], "In-flight Batching with the Triton Inference Server": [[3, "in-flight-batching-with-the-triton-inference-server"]], "Indices and tables": [[65, "indices-and-tables"]], "Inference Endpoints": [[31, "inference-endpoints"]], "Infrastructure Changes": [[94, "infrastructure-changes"], [94, "id4"], [94, "id7"], [94, "id12"], [94, "id16"], [94, "id22"], [94, "id27"], [94, "id34"], [94, "id39"], [94, "id45"]], "Infrastructure changes": [[94, "id51"]], "Input QKV tensor": [[5, "input-qkv-tensor"]], "Installation": [[65, null]], "Installation Errors": [[93, "installation-errors"]], "Installing on Grace Hopper": [[67, null]], "Installing on Linux": [[68, null]], "Interfaces": [[99, "interfaces"]], "Internal Components": [[6, "internal-components"]], "Introduction": [[28, "introduction"], [96, "introduction"]], "KV Cache": [[5, "kv-cache"]], "KV Cache Management: Pools, Blocks, and Events": [[8, null]], "KV Cache Manager": [[99, null]], "KV Cache Manager Introduction": [[99, "kv-cache-manager-introduction"]], "KV Cache Pool Management": [[8, "kv-cache-pool-management"]], "KV Cache Quantization Scaling Factors": [[15, "kv-cache-quantization-scaling-factors"]], "KV cache reuse": [[9, null]], "KVCacheManager": [[97, "kvcachemanager"]], "Kernel Level optimizations": [[26, "kernel-level-optimizations"]], "Kernel fusion": [[26, "kernel-fusion"]], "Key Components": [[95, "key-components"]], "Key Features": [[69, null]], "Key Features and Enhancements": [[94, "key-features-and-enhancements"], [94, "id2"], [94, "id3"], [94, "id5"], [94, "id8"], [94, "id13"], [94, "id18"], [94, "id23"], [94, "id30"], [94, "id35"], [94, "id41"], [94, "id47"], [94, "id53"], [94, "id57"], [94, "id59"]], "Key Optimizations": [[26, "key-optimizations"]], "Known Issues": [[90, "known-issues"], [94, "known-issues"], [94, "id6"], [94, "id10"], [94, "id17"], [94, "id29"], [94, "id40"], [94, "id46"], [94, "id62"], [95, "known-issues"]], "Known Limitations": [[66, "known-limitations"]], "LLM API": [[89, "llm-api"]], "LLM API Examples": [[41, null]], "LLM Common Customizations": [[37, null]], "LLM Examples": [[42, null]], "LLM Examples Introduction": [[41, null]], "LLM Models": [[92, "llm-models"]], "Latest GPU Support": [[72, "latest-gpu-support"]], "Latest HBM Memory": [[23, "latest-hbm-memory"]], "LayerNorm Weights": [[15, "layernorm-weights"]], "Layers": [[84, null]], "Limitations": [[12, "limitations"], [94, "limitations"]], "Limitations and Caveats": [[74, "limitations-and-caveats"]], "Linear": [[84, "module-tensorrt_llm.layers.linear"]], "Linking with the TensorRT-LLM C++ Runtime": [[66, "linking-with-the-tensorrt-llm-c-runtime"]], "Llama 3.1 405B": [[16, "llama-3-1-405b"]], "Llama 3.1 405B FP4": [[75, "llama-3-1-405b-fp4"]], "Llama 3.1 405B FP8": [[75, "llama-3-1-405b-fp8"]], "Llama 3.1 70B": [[16, "llama-3-1-70b"]], "Llama 3.1 70B FP8": [[75, "llama-3-1-70b-fp8"]], "Llama 3.1 8B FP8": [[75, "llama-3-1-8b-fp8"]], "Llama 3.3 70B FP4": [[75, "llama-3-3-70b-fp4"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[24, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Llama-70B on H200 up to 6.7x A100": [[21, "llama-70b-on-h200-up-to-6-7x-a100"]], "Llm Mgmn Llm Distributed": [[56, null]], "Llm Mgmn Trtllm Bench": [[57, null]], "Llm Mgmn Trtllm Serve": [[58, null]], "LoRA Module id mapping": [[10, "lora-module-id-mapping"]], "LoRA arguments": [[30, "tensorrt_llm.commands.build-parse_arguments-lora-arguments"]], "LoRA tensor format details": [[10, "lora-tensor-format-details"]], "LoRA with tensor parallel": [[10, "lora-with-tensor-parallel"]], "Loading function": [[17, "loading-function"]], "Local Hugging Face Models": [[70, "local-hugging-face-models"]], "Local TensorRT-LLM Engine": [[70, "local-tensorrt-llm-engine"]], "Logits arguments": [[30, "tensorrt_llm.commands.build-parse_arguments-logits-arguments"]], "Lookahead Decoding": [[12, "lookahead-decoding"]], "LoraCache configuration": [[10, "loracache-configuration"]], "Low Latency Benchmark": [[74, "low-latency-benchmark"]], "Low Latency GEMM Plugin": [[78, "low-latency-gemm-plugin"]], "Low Latency TensorRT-LLM Engine for Llama-3 70B": [[74, "low-latency-tensorrt-llm-engine-for-llama-3-70b"]], "Low-Precision-AllReduce": [[11, null]], "MLA Layers Optimizations": [[28, "mla-layers-optimizations"]], "MLP": [[84, "module-tensorrt_llm.layers.mlp"]], "MLP Weights": [[15, "mlp-weights"]], "MLPerf on H100 with FP8": [[22, "mlperf-on-h100-with-fp8"]], "MTP": [[26, "mtp"]], "MTP Eagle": [[27, "mtp-eagle"]], "MTP Modules": [[27, "mtp-modules"]], "MTP Vanilla": [[27, "mtp-vanilla"]], "MTP for inference": [[27, "mtp-for-inference"]], "MTP implementation in TensorRT-LLM": [[27, "mtp-implementation-in-tensorrt-llm"]], "MTP optimization - Relaxed Acceptance": [[27, "mtp-optimization-relaxed-acceptance"]], "Make Evaluation": [[15, "make-evaluation"]], "Mark Tensors As Output": [[3, "mark-tensors-as-output"]], "Max Throughput Benchmark": [[74, "max-throughput-benchmark"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[82, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Maximum Attention Window Size": [[82, "maximum-attention-window-size"]], "Medusa": [[12, "medusa"]], "Medusa Tree": [[12, "medusa-tree"]], "Memory Usage of TensorRT-LLM": [[90, null]], "Memory pool": [[90, "memory-pool"]], "Metrics Endpoint": [[31, "metrics-endpoint"]], "Miscellaneous": [[29, "miscellaneous"]], "Mixed ETP": [[26, "mixed-etp"]], "Mixture of Experts (MoE)": [[4, "mixture-of-experts-moe"]], "MoE Layers Optimizations": [[28, "moe-layers-optimizations"]], "Model Architecture": [[26, "model-architecture"]], "Model Configuration": [[6, "model-configuration"], [96, "model-configuration"]], "Model Definition": [[16, null], [96, "model-definition"]], "Model Definition API": [[89, "model-definition-api"]], "Model Engine": [[16, "model-engine"], [97, "model-engine"]], "Model Preparation": [[70, "model-preparation"]], "Model Registration": [[96, "model-registration"]], "Model Updates": [[94, "model-updates"], [94, "id20"], [94, "id25"], [94, "id32"], [94, "id37"], [94, "id43"], [94, "id49"], [94, "id55"], [94, "id58"], [94, "id60"]], "Model Weights": [[18, "model-weights"]], "Models": [[85, null]], "Models (PyTorch Backend)": [[92, "models-pytorch-backend"]], "Models (TensorRT Backend)": [[92, "models-tensorrt-backend"]], "Models with customized key names": [[17, "models-with-customized-key-names"]], "Models with customized weight layout": [[17, "models-with-customized-weight-layout"]], "Motivation for large-scale EP": [[29, "motivation-for-large-scale-ep"]], "Motivation of EP communication kernels for GB200": [[29, "motivation-of-ep-communication-kernels-for-gb200"]], "Multi-GPU Multi-Node Inference": [[72, "multi-gpu-multi-node-inference"]], "Multi-GPU and Multi-Node Support": [[16, "multi-gpu-and-multi-node-support"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[5, null]], "Multi-Modal Models 3": [[92, "multi-modal-models"]], "Multi-node Serving with Slurm": [[31, "multi-node-serving-with-slurm"]], "Multi-streams": [[26, "multi-streams"]], "Multimodal Serving": [[31, "multimodal-serving"]], "Multiple Profiles": [[81, "multiple-profiles"]], "NVFP4 (Blackwell)": [[91, "nvfp4-blackwell"]], "Named Arguments": [[30, "tensorrt_llm.commands.build-parse_arguments-named-arguments"]], "Native Windows Support": [[72, "native-windows-support"]], "Natively supported models": [[17, "natively-supported-models"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[24, null]], "Next Steps": [[89, "next-steps"]], "Normalization": [[84, "module-tensorrt_llm.layers.normalization"]], "Note on context outputs": [[3, "note-on-context-outputs"]], "Numerical Precision": [[91, null]], "Observation over GSM8K dataset": [[29, "observation-over-gsm8k-dataset"]], "Observations over one machine translation dataset": [[29, "observations-over-one-machine-translation-dataset"]], "Obtaining Arbitrary Output Tensors": [[3, "obtaining-arbitrary-output-tensors"]], "Offline EP Load Balancer": [[29, "offline-ep-load-balancer"], [29, "id1"]], "Offloading to host memory": [[9, "offloading-to-host-memory"]], "Online EP Load Balancer": [[29, "online-ep-load-balancer"], [29, "id2"]], "Online Serving Examples": [[64, null]], "Only collect specific iterations": [[73, "only-collect-specific-iterations"]], "OpenAI Chat Client": [[61, null], [62, null]], "OpenAI Completion Client": [[63, null]], "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers": [[28, null]], "Option 1: Build TensorRT-LLM in One Step": [[66, "option-1-build-tensorrt-llm-in-one-step"]], "Option 1: Full Build with C++ Compilation": [[66, "option-1-full-build-with-c-compilation"]], "Option 2: Build TensorRT-LLM Step-by-Step": [[66, "option-2-build-tensorrt-llm-step-by-step"]], "Option 2: Python-Only Build without C++ Compilation": [[66, "option-2-python-only-build-without-c-compilation"]], "Other Build Modes": [[74, "other-build-modes"]], "Out of memory issues": [[20, "out-of-memory-issues"]], "Out-of-Tree Models": [[96, "out-of-tree-models"]], "Overview": [[6, "overview"], [15, "overview"], [17, "overview"], [19, "overview"], [72, null], [75, null]], "Padded and Packed Tensors": [[5, "padded-and-packed-tensors"]], "Page": [[8, "page"]], "Paged Context Attention": [[81, "paged-context-attention"]], "Paged KV Cache": [[5, "paged-kv-cache"]], "Parallel strategy": [[28, "parallel-strategy"]], "Parallelism Mapping Support": [[74, "parallelism-mapping-support"]], "Parallelism Strategy": [[26, "parallelism-strategy"]], "Pattern and Pattern Manager": [[7, "pattern-and-pattern-manager"]], "Pattern-Matching and Fusion": [[16, "pattern-matching-and-fusion"]], "Performance": [[25, "performance"], [65, null], [81, "performance"]], "Performance Analysis": [[73, null]], "Performance Improvements": [[12, "performance-improvements"]], "Performance Tuning Guide": [[79, null]], "Performance and Accuracy Considerations": [[11, "performance-and-accuracy-considerations"]], "Performance expectations": [[9, "performance-expectations"]], "Performance study": [[29, "performance-study"]], "Performance with GEMM + SwiGLU Fusion": [[78, "performance-with-gemm-swiglu-fusion"]], "Performance with GEMM Plugin": [[81, "performance-with-gemm-plugin"]], "Performance with Low Latency GEMM plugin": [[78, "performance-with-low-latency-gemm-plugin"]], "Performance with Quantized KV Cache": [[78, "performance-with-quantized-kv-cache"]], "Performance with Reduce Norm Fusion": [[81, "performance-with-reduce-norm-fusion"]], "Performance with Reduce Norm Fusion + User Buffers:": [[78, "performance-with-reduce-norm-fusion-user-buffers"]], "Performance with multiple profiles": [[81, "performance-with-multiple-profiles"]], "Persistence mode": [[74, "persistence-mode"]], "Pipeline Parallel Reduce Scatter Optimization": [[81, "pipeline-parallel-reduce-scatter-optimization"]], "Plugin": [[86, null]], "Plugin config arguments": [[30, "tensorrt_llm.commands.build-parse_arguments-plugin-config-arguments"]], "Plugins": [[16, "plugins"]], "Pool": [[8, "pool"]], "Pooling": [[84, "module-tensorrt_llm.layers.pooling"]], "Postprocessing functions": [[17, "postprocessing-functions"]], "Precision Strategy": [[26, "precision-strategy"]], "Precision strategy": [[28, "precision-strategy"]], "Prepare": [[33, "prepare"]], "Prepare Dataset": [[76, "prepare-dataset"]], "Prepare the TensorRT-LLM Checkpoint": [[15, "prepare-the-tensorrt-llm-checkpoint"]], "Preparing a Dataset": [[74, "preparing-a-dataset"], [75, "preparing-a-dataset"]], "Prerequisite Knowledge": [[79, "prerequisite-knowledge"]], "Prerequisites": [[66, "prerequisites"], [89, "prerequisites"], [96, "prerequisites"]], "Prerequisites: Install TensorRT-LLM and download models": [[20, "prerequisites-install-tensorrt-llm-and-download-models"]], "Profiling specific iterations on a trtllm-bench/trtllm-serve run": [[73, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"]], "Prompt-Lookup-Decoding": [[12, "prompt-lookup-decoding"]], "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs": [[26, null]], "PyExecutor": [[97, "pyexecutor"]], "PyTorch Backend": [[95, null]], "Python Bindings for the Executor API": [[3, "python-bindings-for-the-executor-api"]], "Python Interface": [[29, "python-interface"]], "Python runtime (Not recommended to be used)": [[90, "python-runtime-not-recommended-to-be-used"]], "Quantization": [[37, "quantization"], [87, null], [95, "quantization"]], "Quantization APIs": [[19, "quantization-apis"]], "Quantization and Dequantization (Q/DQ)": [[91, "quantization-and-dequantization-q-dq"]], "Quantization in TensorRT-LLM": [[25, "quantization-in-tensorrt-llm"]], "Quantization in the PyTorch Flow": [[74, "quantization-in-the-pytorch-flow"]], "Quantized KV-Cache": [[78, "quantized-kv-cache"]], "Quick Start": [[95, "quick-start"]], "Quick Start Guide": [[89, null]], "Quickstart": [[74, "quickstart"]], "Rank Weights": [[15, "rank-weights"]], "Re-balanced the sparse experts": [[26, "re-balanced-the-sparse-experts"]], "ReDrafter": [[12, "redrafter"]], "Reduce Norm Fusion Plugin for Llama models:": [[81, "reduce-norm-fusion-plugin-for-llama-models"]], "Reduce Norm Fusion with User Buffers for Llama Models": [[78, "reduce-norm-fusion-with-user-buffers-for-llama-models"]], "Reference": [[14, "reference"], [65, null]], "Related Information": [[89, "related-information"]], "Relative Attention Bias (RAB)": [[5, "relative-attention-bias-rab"]], "Relax Acceptance Verification": [[26, "relax-acceptance-verification"]], "Relaxed Acceptance": [[27, "relaxed-acceptance"]], "Release Notes": [[94, null]], "Reproducing Benchmarked Results": [[75, "reproducing-benchmarked-results"]], "Reproducing steps": [[20, "reproducing-steps"], [29, "reproducing-steps"]], "Request Additional Output": [[3, "request-additional-output"]], "ResourceManager": [[97, "resourcemanager"]], "Results": [[76, "results"]], "Revisiting Paged Context Attention and Context Chunking": [[80, "revisiting-paged-context-attention-and-context-chunking"]], "Rotary Positional Embedding (RoPE)": [[5, "rotary-positional-embedding-rope"]], "RouterGEMM": [[26, "routergemm"]], "Run gpt-2b + LoRA using Executor / cpp runtime": [[10, null]], "Run the Model": [[89, "run-the-model"]], "Running Throughput and Latency Benchmarks": [[76, "running-throughput-and-latency-benchmarks"]], "Running With Weight Streaming to Reduce GPU Memory Consumption": [[13, null]], "Running multi-modal models in the PyTorch Workflow": [[74, "running-multi-modal-models-in-the-pytorch-workflow"]], "Running the Benchmark": [[75, "running-the-benchmark"]], "Running with the PyTorch Workflow": [[74, "running-with-the-pytorch-workflow"]], "Runtime": [[1, null], [16, "runtime"], [88, null]], "Runtime Customization": [[37, "runtime-customization"]], "Runtime Optimizations": [[28, "runtime-optimizations"]], "Sampling": [[37, "sampling"]], "Sampling Parameters": [[6, "sampling-parameters"]], "Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)": [[29, null]], "Scaling factor(s)": [[5, "scaling-factor-s"]], "Scheduler": [[97, "scheduler"], [100, null]], "Scheduler Introduction": [[100, "scheduler-introduction"]], "Scripts": [[42, null], [64, null]], "Sending Requests with Different Beam Widths": [[3, "sending-requests-with-different-beam-widths"]], "Set power limits": [[74, "set-power-limits"]], "Situations that can prevent kv cache reuse": [[9, "situations-that-can-prevent-kv-cache-reuse"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[5, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Smart Router": [[26, "smart-router"]], "Software": [[92, "software"]], "Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)": [[26, "sparse-experts-as-gemms-only-works-when-moe-backend-cutlass"]], "Speculative Sampling": [[12, null]], "Speculative decoding arguments": [[30, "tensorrt_llm.commands.build-parse_arguments-speculative-decoding-arguments"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[25, null]], "Starting a Server": [[31, "starting-a-server"]], "Step 1. Write Modeling Part": [[14, "step-1-write-modeling-part"]], "Step 1: Run inference and collect statistics": [[29, "step-1-run-inference-and-collect-statistics"]], "Step 2. Implement Weight Conversion": [[14, "step-2-implement-weight-conversion"]], "Step 2: Generate the EPLB configuration": [[29, "step-2-generate-the-eplb-configuration"]], "Step 3. Register New Model": [[14, "step-3-register-new-model"]], "Step 3: Run inference with the EPLB configuration": [[29, "step-3-run-inference-with-the-eplb-configuration"]], "Step 4. Verify New Model": [[14, "step-4-verify-new-model"]], "Step-by-Step Guide": [[96, "step-by-step-guide"]], "StreamingLLM": [[5, "streamingllm"]], "Structured output with guided decoding": [[3, "structured-output-with-guided-decoding"]], "Summary": [[74, "summary"]], "Summary of Configuration Option Recommendations:": [[78, "summary-of-configuration-option-recommendations"], [81, "summary-of-configuration-option-recommendations"]], "Support Matrix": [[92, null]], "Support matrix": [[91, "support-matrix"]], "Supported C++ Header Files": [[66, "supported-c-header-files"]], "Supported Models": [[70, "supported-models"]], "Supported Quantization Modes": [[74, "supported-quantization-modes"]], "Syntax": [[31, "syntax"]], "System Level optimizations": [[26, "system-level-optimizations"]], "TRTLLM Backend": [[26, "trtllm-backend"]], "Table of Contents": [[20, "table-of-contents"], [26, "table-of-contents"], [27, "table-of-contents"], [28, "table-of-contents"], [29, "table-of-contents"], [79, "table-of-contents"], [96, "table-of-contents"]], "Technical Detail: The QuantMode Flags": [[91, "technical-detail-the-quantmode-flags"]], "Tensor Parallel vs Expert Parallel": [[4, "tensor-parallel-vs-expert-parallel"]], "Tensor-Related Methods": [[7, "tensor-related-methods"]], "TensorRT Compiler": [[16, "tensorrt-compiler"]], "TensorRT-LLM Architecture": [[18, null]], "TensorRT-LLM Benchmarking": [[74, null]], "TensorRT-LLM Build Workflow": [[19, null]], "TensorRT-LLM Checkpoint": [[15, null]], "TensorRT-LLM Model Weights Loader": [[17, null]], "TensorRT-LLM Release 0.10.0": [[94, "tensorrt-llm-release-0-10-0"]], "TensorRT-LLM Release 0.11.0": [[94, "tensorrt-llm-release-0-11-0"]], "TensorRT-LLM Release 0.12.0": [[94, "tensorrt-llm-release-0-12-0"]], "TensorRT-LLM Release 0.13.0": [[94, "tensorrt-llm-release-0-13-0"]], "TensorRT-LLM Release 0.14.0": [[94, "tensorrt-llm-release-0-14-0"]], "TensorRT-LLM Release 0.15.0": [[94, "tensorrt-llm-release-0-15-0"]], "TensorRT-LLM Release 0.16.0": [[94, "tensorrt-llm-release-0-16-0"]], "TensorRT-LLM Release 0.17.0": [[94, "tensorrt-llm-release-0-17-0"]], "TensorRT-LLM Release 0.18.0": [[94, "tensorrt-llm-release-0-18-0"]], "TensorRT-LLM Release 0.18.1": [[94, "tensorrt-llm-release-0-18-1"]], "TensorRT-LLM Release 0.18.2": [[94, "tensorrt-llm-release-0-18-2"]], "TensorRT-LLM Release 0.19.0": [[94, "tensorrt-llm-release-0-19-0"]], "TensorRT-LLM Release 0.7.1": [[94, "tensorrt-llm-release-0-7-1"]], "TensorRT-LLM Release 0.8.0": [[94, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.9.0": [[94, "tensorrt-llm-release-0-9-0"]], "The Executor Class": [[3, "the-executor-class"]], "The Request Class": [[3, "the-request-class"]], "The Response Class": [[3, "the-response-class"]], "The Result Class": [[3, "the-result-class"]], "The effect of EP Load Balancer": [[29, "the-effect-of-ep-load-balancer"], [29, "id3"]], "Throughput Benchmarking": [[74, "throughput-benchmarking"]], "Throughput Measurements": [[75, "throughput-measurements"]], "Tips": [[93, "tips"]], "Tips and Troubleshooting": [[70, "tips-and-troubleshooting"]], "Tokenizer Customization": [[37, "tokenizer-customization"]], "Top Level API": [[97, "top-level-api"]], "Topology Requirements": [[11, "topology-requirements"]], "Translator": [[17, "translator"]], "Tree-based speculative decoding support": [[27, "tree-based-speculative-decoding-support"]], "Trouble shooting": [[17, "trouble-shooting"]], "Troubleshooting": [[93, null]], "Troubleshooting Tips and Pitfalls To Avoid": [[76, "troubleshooting-tips-and-pitfalls-to-avoid"]], "Troubleshooting and FAQ": [[2, "troubleshooting-and-faq"]], "Tuning Case Study": [[80, "tuning-case-study"], [80, "id2"]], "Tuning Max Batch Size": [[80, "tuning-max-batch-size"]], "Tuning Max Batch Size and Max Num Tokens": [[80, null]], "Tuning Max Num Tokens": [[80, "tuning-max-num-tokens"]], "Types of Events": [[8, "types-of-events"]], "Understand inference time GPU memory usage": [[90, "understand-inference-time-gpu-memory-usage"]], "Understanding the TensorRT-LLM scheduler": [[80, "understanding-the-tensorrt-llm-scheduler"]], "Upload the Docker Image to DockerHub": [[32, "upload-the-docker-image-to-dockerhub"]], "Usage": [[2, "usage"], [11, "usage"]], "Useful Build-Time Flags": [[81, null]], "Useful Runtime Options": [[82, null]], "Using Medusa with TensorRT-LLM": [[12, "using-medusa-with-tensorrt-llm"]], "Validated Networks for Benchmarking": [[74, "validated-networks-for-benchmarking"]], "Variables": [[75, "variables"]], "Visualize the PyTorch profiler results": [[73, "visualize-the-pytorch-profiler-results"]], "WIP: Chunked context support on DeepSeek models": [[20, "wip-chunked-context-support-on-deepseek-models"]], "WIP: Enable more features by default": [[20, "wip-enable-more-features-by-default"]], "Weight Bindings": [[16, "weight-bindings"]], "Weight Loading": [[96, "weight-loading"]], "Weights absorb and MQA": [[28, "weights-absorb-and-mqa"]], "Welcome to TensorRT-LLM\u2019s Documentation!": [[65, null]], "What Can You Do With TensorRT-LLM?": [[72, "what-can-you-do-with-tensorrt-llm"]], "What Triggers an Event?": [[8, "what-triggers-an-event"]], "What is H100 FP8?": [[22, "what-is-h100-fp8"]], "What\u2019s coming next": [[25, "whats-coming-next"]], "When to Use Graph Rewriting?": [[7, "when-to-use-graph-rewriting"]], "WindowBlockManager/BlockManager": [[8, "windowblockmanager-blockmanager"]], "Workflow": [[17, "workflow"], [74, "workflow"]], "Workload Profile": [[26, "workload-profile"]], "World Configuration": [[6, "world-configuration"]], "XQA Optimization": [[5, "xqa-optimization"]], "bufferManager.h": [[1, "buffermanager-h"]], "cacheCommunicator.h": [[0, "cachecommunicator-h"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "dataTransceiverState.h": [[0, "datatransceiverstate-h"]], "decoderState.h": [[1, "decoderstate-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "disaggServerUtil.h": [[0, "disaggserverutil-h"]], "disaggregated": [[31, "trtllm-serve-disaggregated"]], "disaggregated_mpi_worker": [[31, "trtllm-serve-disaggregated-mpi-worker"]], "eagleBuffers.h": [[1, "eaglebuffers-h"]], "eagleModule.h": [[1, "eaglemodule-h"]], "executor.h": [[0, "executor-h"]], "explicitDraftTokensBuffers.h": [[1, "explicitdrafttokensbuffers-h"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatched.h": [[1, "gptdecoderbatched-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatched.h": [[1, "igptdecoderbatched-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcNvlsMemory.h": [[1, "ipcnvlsmemory-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "lookaheadBuffers.h": [[1, "lookaheadbuffers-h"]], "lookaheadModule.h": [[1, "lookaheadmodule-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "medusaModule.h": [[1, "medusamodule-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "modelConfig.h": [[1, "modelconfig-h"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "rawEngine.h": [[1, "rawengine-h"]], "request.h": [[1, "request-h"]], "runtimeDefaults.h": [[1, "runtimedefaults-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "serialization.h": [[0, "serialization-h"]], "serve": [[31, "trtllm-serve-serve"]], "speculativeDecodingMode.h": [[1, "speculativedecodingmode-h"]], "speculativeDecodingModule.h": [[1, "speculativedecodingmodule-h"]], "tensor.h": [[0, "tensor-h"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "transferAgent.h": [[0, "transferagent-h"]], "trtllm-build": [[30, null]], "trtllm-serve": [[31, null], [31, "trtllm-serve"]], "types.h": [[0, "types-h"]], "worldConfig.h": [[1, "worldconfig-h"]]}, "docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "advanced/disaggregated-service", "advanced/executor", "advanced/expert-parallelism", "advanced/gpt-attention", "advanced/gpt-runtime", "advanced/graph-rewriting", "advanced/kv-cache-management", "advanced/kv-cache-reuse", "advanced/lora", "advanced/lowprecision-pcie-allreduce", "advanced/speculative-decoding", "advanced/weight-streaming", "architecture/add-model", "architecture/checkpoint", "architecture/core-concepts", "architecture/model-weights-loader", "architecture/overview", "architecture/workflow", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM", "commands/trtllm-build", "commands/trtllm-serve", "dev-on-cloud/build-image-to-dockerhub", "dev-on-cloud/dev-on-runpod", "examples/curl_chat_client", "examples/curl_chat_client_for_multimodal", "examples/curl_completion_client", "examples/customization", "examples/deepseek_r1_reasoning_parser", "examples/genai_perf_client", "examples/genai_perf_client_for_multimodal", "examples/index", "examples/llm_api_examples", "examples/llm_auto_parallel", "examples/llm_eagle2_decoding", "examples/llm_eagle_decoding", "examples/llm_guided_decoding", "examples/llm_inference", "examples/llm_inference_async", "examples/llm_inference_async_streaming", "examples/llm_inference_customize", "examples/llm_inference_distributed", "examples/llm_inference_kv_events", "examples/llm_logits_processor", "examples/llm_lookahead_decoding", "examples/llm_medusa_decoding", "examples/llm_mgmn_llm_distributed", "examples/llm_mgmn_trtllm_bench", "examples/llm_mgmn_trtllm_serve", "examples/llm_multilora", "examples/llm_quantization", "examples/openai_chat_client", "examples/openai_chat_client_for_multimodal", "examples/openai_completion_client", "examples/trtllm_serve_examples", "index", "installation/build-from-source-linux", "installation/grace-hopper", "installation/linux", "key-features", "llm-api/index", "llm-api/reference", "overview", "performance/perf-analysis", "performance/perf-benchmarking", "performance/perf-overview", "performance/performance-tuning-guide/benchmarking-default-performance", "performance/performance-tuning-guide/deciding-model-sharding-strategy", "performance/performance-tuning-guide/fp8-quantization", "performance/performance-tuning-guide/index", "performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens", "performance/performance-tuning-guide/useful-build-time-flags", "performance/performance-tuning-guide/useful-runtime-flags", "python-api/tensorrt_llm.functional", "python-api/tensorrt_llm.layers", "python-api/tensorrt_llm.models", "python-api/tensorrt_llm.plugin", "python-api/tensorrt_llm.quantization", "python-api/tensorrt_llm.runtime", "quick-start-guide", "reference/memory", "reference/precision", "reference/support-matrix", "reference/troubleshooting", "release-notes", "torch", "torch/adding_new_model", "torch/arch_overview", "torch/attention", "torch/kv_cache_manager", "torch/scheduler"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "advanced/disaggregated-service.md", "advanced/executor.md", "advanced/expert-parallelism.md", "advanced/gpt-attention.md", "advanced/gpt-runtime.md", "advanced/graph-rewriting.md", "advanced/kv-cache-management.md", "advanced/kv-cache-reuse.md", "advanced/lora.md", "advanced/lowprecision-pcie-allreduce.md", "advanced/speculative-decoding.md", "advanced/weight-streaming.md", "architecture/add-model.md", "architecture/checkpoint.md", "architecture/core-concepts.md", "architecture/model-weights-loader.md", "architecture/overview.md", "architecture/workflow.md", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md", "commands/trtllm-build.rst", "commands/trtllm-serve.rst", "dev-on-cloud/build-image-to-dockerhub.md", "dev-on-cloud/dev-on-runpod.md", "examples/curl_chat_client.rst", "examples/curl_chat_client_for_multimodal.rst", "examples/curl_completion_client.rst", "examples/customization.md", "examples/deepseek_r1_reasoning_parser.rst", "examples/genai_perf_client.rst", "examples/genai_perf_client_for_multimodal.rst", "examples/index.rst", "examples/llm_api_examples.rst", "examples/llm_auto_parallel.rst", "examples/llm_eagle2_decoding.rst", "examples/llm_eagle_decoding.rst", "examples/llm_guided_decoding.rst", "examples/llm_inference.rst", "examples/llm_inference_async.rst", "examples/llm_inference_async_streaming.rst", "examples/llm_inference_customize.rst", "examples/llm_inference_distributed.rst", "examples/llm_inference_kv_events.rst", "examples/llm_logits_processor.rst", "examples/llm_lookahead_decoding.rst", "examples/llm_medusa_decoding.rst", "examples/llm_mgmn_llm_distributed.rst", "examples/llm_mgmn_trtllm_bench.rst", "examples/llm_mgmn_trtllm_serve.rst", "examples/llm_multilora.rst", "examples/llm_quantization.rst", "examples/openai_chat_client.rst", "examples/openai_chat_client_for_multimodal.rst", "examples/openai_completion_client.rst", "examples/trtllm_serve_examples.rst", "index.rst", "installation/build-from-source-linux.md", "installation/grace-hopper.md", "installation/linux.md", "key-features.md", "llm-api/index.md", "llm-api/reference.rst", "overview.md", "performance/perf-analysis.md", "performance/perf-benchmarking.md", "performance/perf-overview.md", "performance/performance-tuning-guide/benchmarking-default-performance.md", "performance/performance-tuning-guide/deciding-model-sharding-strategy.md", "performance/performance-tuning-guide/fp8-quantization.md", "performance/performance-tuning-guide/index.rst", "performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md", "performance/performance-tuning-guide/useful-build-time-flags.md", "performance/performance-tuning-guide/useful-runtime-flags.md", "python-api/tensorrt_llm.functional.rst", "python-api/tensorrt_llm.layers.rst", "python-api/tensorrt_llm.models.rst", "python-api/tensorrt_llm.plugin.rst", "python-api/tensorrt_llm.quantization.rst", "python-api/tensorrt_llm.runtime.rst", "quick-start-guide.md", "reference/memory.md", "reference/precision.md", "reference/support-matrix.md", "reference/troubleshooting.md", "release-notes.md", "torch.md", "torch/adding_new_model.md", "torch/arch_overview.md", "torch/attention.md", "torch/kv_cache_manager.md", "torch/scheduler.md"], "indexentries": {"--backend": [[31, "cmdoption-trtllm-serve-serve-backend", false]], "--cluster_size": [[31, "cmdoption-trtllm-serve-serve-cluster_size", false]], "--config_file": [[31, "cmdoption-trtllm-serve-disaggregated-c", false], [31, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "--ep_size": [[31, "cmdoption-trtllm-serve-serve-ep_size", false]], "--extra_llm_api_options": [[31, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false]], "--gpus_per_node": [[31, "cmdoption-trtllm-serve-serve-gpus_per_node", false]], "--host": [[31, "cmdoption-trtllm-serve-serve-host", false]], "--kv_cache_free_gpu_memory_fraction": [[31, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false]], "--log_level": [[31, "cmdoption-trtllm-serve-disaggregated-l", false], [31, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false], [31, "cmdoption-trtllm-serve-serve-log_level", false]], "--max_batch_size": [[31, "cmdoption-trtllm-serve-serve-max_batch_size", false]], "--max_beam_width": [[31, "cmdoption-trtllm-serve-serve-max_beam_width", false]], "--max_num_tokens": [[31, "cmdoption-trtllm-serve-serve-max_num_tokens", false]], "--max_seq_len": [[31, "cmdoption-trtllm-serve-serve-max_seq_len", false]], "--metadata_server_config_file": [[31, "cmdoption-trtllm-serve-disaggregated-m", false], [31, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false]], "--num_postprocess_workers": [[31, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false]], "--port": [[31, "cmdoption-trtllm-serve-serve-port", false]], "--pp_size": [[31, "cmdoption-trtllm-serve-serve-pp_size", false]], "--reasoning_parser": [[31, "cmdoption-trtllm-serve-serve-reasoning_parser", false]], "--request_timeout": [[31, "cmdoption-trtllm-serve-disaggregated-r", false]], "--server_role": [[31, "cmdoption-trtllm-serve-serve-server_role", false]], "--server_start_timeout": [[31, "cmdoption-trtllm-serve-disaggregated-t", false]], "--tokenizer": [[31, "cmdoption-trtllm-serve-serve-tokenizer", false]], "--tp_size": [[31, "cmdoption-trtllm-serve-serve-tp_size", false]], "--trust_remote_code": [[31, "cmdoption-trtllm-serve-serve-0", false], [31, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "-c": [[31, "cmdoption-trtllm-serve-disaggregated-c", false], [31, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "-l": [[31, "cmdoption-trtllm-serve-disaggregated-l", false]], "-m": [[31, "cmdoption-trtllm-serve-disaggregated-m", false]], "-r": [[31, "cmdoption-trtllm-serve-disaggregated-r", false]], "-t": [[31, "cmdoption-trtllm-serve-disaggregated-t", false]], "__init__() (tensorrt_llm.llmapi.buildcacheconfig method)": [[71, "tensorrt_llm.llmapi.BuildCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildconfig method)": [[71, "tensorrt_llm.llmapi.BuildConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.completionoutput method)": [[71, "tensorrt_llm.llmapi.CompletionOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.disaggregatedparams method)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.guideddecodingparams method)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheretentionconfig method)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig method)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.mpicommsession method)": [[71, "tensorrt_llm.llmapi.MpiCommSession.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantconfig method)": [[71, "tensorrt_llm.llmapi.QuantConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput method)": [[71, "tensorrt_llm.llmapi.RequestOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.samplingparams method)": [[71, "tensorrt_llm.llmapi.SamplingParams.__init__", false]], "abort() (tensorrt_llm.llmapi.mpicommsession method)": [[71, "tensorrt_llm.llmapi.MpiCommSession.abort", false]], "abs() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.abs", false]], "abs() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.abs", false]], "activation() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.activation", false]], "adalayernorm (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNorm", false]], "adalayernormcontinuous (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous", false]], "adalayernormzero (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNormZero", false]], "adalayernormzerosingle (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle", false]], "add() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.add", false]], "add_input() (tensorrt_llm.functional.conditional method)": [[83, "tensorrt_llm.functional.Conditional.add_input", false]], "add_output() (tensorrt_llm.functional.conditional method)": [[83, "tensorrt_llm.functional.Conditional.add_output", false]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[88, "tensorrt_llm.runtime.KVCacheManager.add_sequence", false]], "add_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.add_special_tokens", false]], "additional_model_outputs (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.additional_model_outputs", false]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.alibi", false]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale", false]], "allgather() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.allgather", false]], "allreduce() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.allreduce", false]], "allreducefusionop (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.AllReduceFusionOp", false]], "allreduceparams (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.AllReduceParams", false]], "allreducestrategy (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.AllReduceStrategy", false]], "apply_batched_logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor", false]], "apply_llama3_scaling() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling", false]], "apply_rotary_pos_emb() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb", false]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm", false]], "apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm", false]], "arange() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.arange", false]], "argmax() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.argmax", false]], "assert_valid_quant_algo() (tensorrt_llm.models.gemmaforcausallm class method)": [[85, "tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo", false]], "assertion() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.assertion", false]], "attention (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.Attention", false]], "attentionmaskparams (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.AttentionMaskParams", false]], "attentionmasktype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.AttentionMaskType", false]], "attentionparams (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.AttentionParams", false]], "attn_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.attn_backend", false]], "attn_processors (tensorrt_llm.models.sd3transformer2dmodel property)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.attn_processors", false]], "audio_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir", false]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.AUTO", false]], "auto_deploy_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.auto_deploy_config", false]], "auto_parallel (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel", false]], "auto_parallel_config (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.auto_parallel_config", false]], "auto_parallel_config (tensorrt_llm.llmapi.trtllmargs property)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_config", false]], "auto_parallel_world_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.auto_parallel_world_size", false]], "autotuner_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.autotuner_enabled", false]], "avg_pool2d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.avg_pool2d", false]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[84, "tensorrt_llm.layers.pooling.AvgPool2d", false]], "axes (tensorrt_llm.functional.sliceinputtype attribute)": [[83, "tensorrt_llm.functional.SliceInputType.axes", false]], "bad (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.bad", false]], "bad_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.bad_token_ids", false]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.bad_words_list", false]], "baichuanforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.BaichuanForCausalLM", false]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.batch_size", false]], "batchingtype (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.BatchingType", false]], "beam_search_diversity_rate (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate", false]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate", false]], "beam_width_array (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.beam_width_array", false]], "bert_attention() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.bert_attention", false]], "bertattention (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.BertAttention", false]], "bertforquestionanswering (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.BertForQuestionAnswering", false]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.BertForSequenceClassification", false]], "bertmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.BertModel", false]], "best_of (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.best_of", false]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.bidirectional", false]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm", false]], "blocksparse (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.blocksparse", false]], "blocksparseattnparams (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.BlockSparseAttnParams", false]], "bloomforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.BloomForCausalLM", false]], "bloommodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.BloomModel", false]], "broadcast_helper() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.broadcast_helper", false]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.buffer_allocated", false]], "build_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.build_config", false]], "build_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.build_config", false]], "buildcacheconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.BuildCacheConfig", false]], "buildconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.BuildConfig", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildCacheConfig.cache_root", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig property)": [[71, "id8", false]], "cachetransceiverconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.CacheTransceiverConfig", false]], "calculate_speculative_resource() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource", false]], "calib_batch_size (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.calib_batch_size", false]], "calib_batches (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.calib_batches", false]], "calib_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.calib_config", false]], "calib_dataset (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.calib_dataset", false]], "calib_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length", false]], "calibconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.CalibConfig", false]], "capacity_scheduler_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[71, "tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy", false]], "capacityschedulerpolicy (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.CapacitySchedulerPolicy", false]], "cast (class in tensorrt_llm.layers.cast)": [[84, "tensorrt_llm.layers.cast.Cast", false]], "cast() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.cast", false]], "cast() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.cast", false]], "categorical_sample() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.categorical_sample", false]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.causal", false]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.chatglm", false]], "chatglmconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.ChatGLMConfig", false]], "chatglmforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.ChatGLMForCausalLM", false]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.ChatGLMGenerationSession", false]], "chatglmmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.ChatGLMModel", false]], "check_config() (tensorrt_llm.models.decodermodel method)": [[85, "tensorrt_llm.models.DecoderModel.check_config", false]], "check_config() (tensorrt_llm.models.dit method)": [[85, "tensorrt_llm.models.DiT.check_config", false]], "check_config() (tensorrt_llm.models.encodermodel method)": [[85, "tensorrt_llm.models.EncoderModel.check_config", false]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[85, "tensorrt_llm.models.FalconForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[85, "tensorrt_llm.models.MPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[85, "tensorrt_llm.models.OPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[85, "tensorrt_llm.models.PhiForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[85, "tensorrt_llm.models.PretrainedModel.check_config", false]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.choices", false]], "chunk() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.chunk", false]], "clamp_val (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.clamp_val", false]], "clip() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.clip", false]], "clipvisiontransformer (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.CLIPVisionTransformer", false]], "cogvlmattention (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.CogVLMAttention", false]], "cogvlmconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.CogVLMConfig", false]], "cogvlmforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.CogVLMForCausalLM", false]], "cohereforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.CohereForCausalLM", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linear method)": [[84, "tensorrt_llm.layers.linear.Linear.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linearbase method)": [[84, "tensorrt_llm.layers.linear.LinearBase.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.rowlinear method)": [[84, "tensorrt_llm.layers.linear.RowLinear.collect_and_bias", false]], "columnlinear (in module tensorrt_llm.layers.linear)": [[84, "tensorrt_llm.layers.linear.ColumnLinear", false]], "combinedtimesteplabelembeddings (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings", false]], "combinedtimesteptextprojembeddings (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings", false]], "completionoutput (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.CompletionOutput", false]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.compute_relative_bias", false]], "concat() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.concat", false]], "conditional (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.Conditional", false]], "config_class (tensorrt_llm.models.baichuanforcausallm attribute)": [[85, "tensorrt_llm.models.BaichuanForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.chatglmforcausallm attribute)": [[85, "tensorrt_llm.models.ChatGLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cogvlmforcausallm attribute)": [[85, "tensorrt_llm.models.CogVLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cohereforcausallm attribute)": [[85, "tensorrt_llm.models.CohereForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.dbrxforcausallm attribute)": [[85, "tensorrt_llm.models.DbrxForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekforcausallm attribute)": [[85, "tensorrt_llm.models.DeepseekForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekv2forcausallm attribute)": [[85, "tensorrt_llm.models.DeepseekV2ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.eagleforcausallm attribute)": [[85, "tensorrt_llm.models.EagleForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.falconforcausallm attribute)": [[85, "tensorrt_llm.models.FalconForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gemmaforcausallm attribute)": [[85, "tensorrt_llm.models.GemmaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptforcausallm attribute)": [[85, "tensorrt_llm.models.GPTForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptjforcausallm attribute)": [[85, "tensorrt_llm.models.GPTJForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.llamaforcausallm attribute)": [[85, "tensorrt_llm.models.LLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.mambaforcausallm attribute)": [[85, "tensorrt_llm.models.MambaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.medusaforcausallm attribute)": [[85, "tensorrt_llm.models.MedusaForCausalLm.config_class", false]], "config_class (tensorrt_llm.models.mllamaforcausallm attribute)": [[85, "tensorrt_llm.models.MLLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phi3forcausallm attribute)": [[85, "tensorrt_llm.models.Phi3ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phiforcausallm attribute)": [[85, "tensorrt_llm.models.PhiForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.sd3transformer2dmodel attribute)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.config_class", false]], "constant() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.constant", false]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.constant_to_tensor_", false]], "constants_to_tensors_() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.constants_to_tensors_", false]], "context (tensorrt_llm.runtime.session property)": [[88, "tensorrt_llm.runtime.Session.context", false]], "context_chunking_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[71, "tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy", false]], "context_logits (tensorrt_llm.llmapi.requestoutput attribute)": [[71, "tensorrt_llm.llmapi.RequestOutput.context_logits", false]], "context_mem_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.context_mem_size", false]], "context_mem_size (tensorrt_llm.runtime.session property)": [[88, "tensorrt_llm.runtime.Session.context_mem_size", false]], "contextchunkingpolicy (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.ContextChunkingPolicy", false]], "conv1d (class in tensorrt_llm.layers.conv)": [[84, "tensorrt_llm.layers.conv.Conv1d", false]], "conv1d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.conv1d", false]], "conv2d (class in tensorrt_llm.layers.conv)": [[84, "tensorrt_llm.layers.conv.Conv2d", false]], "conv2d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.conv2d", false]], "conv3d (class in tensorrt_llm.layers.conv)": [[84, "tensorrt_llm.layers.conv.Conv3d", false]], "conv3d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.conv3d", false]], "conv_kernel (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.conv_kernel", false]], "conv_kernel (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.conv_kernel", false]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.conv_transpose2d", false]], "convert_load_format() (tensorrt_llm.llmapi.torchllmargs class method)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format", false]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[84, "tensorrt_llm.layers.conv.ConvTranspose2d", false]], "copy_on_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse", false]], "cos() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.cos", false]], "cp_split_plugin() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.cp_split_plugin", false]], "cpp_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e", false]], "cpp_llm_only (tensorrt_llm.runtime.multimodalmodelrunner property)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only", false]], "create_allreduce_plugin() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.create_allreduce_plugin", false]], "create_attention_const_params() (tensorrt_llm.layers.attention.attention static method)": [[84, "tensorrt_llm.layers.attention.Attention.create_attention_const_params", false]], "create_fake_weight() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight", false]], "create_runtime_defaults() (tensorrt_llm.models.pretrainedconfig static method)": [[85, "tensorrt_llm.models.PretrainedConfig.create_runtime_defaults", false]], "create_sinusoidal_positions() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions", false]], "create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin", false]], "create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin", false]], "create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.ropeembeddingutils method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope", false]], "create_sinusoidal_positions_yarn() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn", false]], "cropped_pos_embed() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[84, "tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed", false]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.cross_attention", false]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.cross_attention", false]], "cross_kv_cache_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction", false]], "ctx_request_id (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id", false]], "cuda_graph_batch_sizes (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_batch_sizes", false]], "cuda_graph_cache_size (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[71, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size", false]], "cuda_graph_max_batch_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_max_batch_size", false]], "cuda_graph_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[71, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode", false]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode", false]], "cuda_graph_padding_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_padding_enabled", false]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard", false]], "cuda_stream_sync() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.cuda_stream_sync", false]], "cumsum() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.cumsum", false]], "cumulative_logprob (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob", false]], "custom_mask (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.custom_mask", false]], "data (tensorrt_llm.functional.sliceinputtype attribute)": [[83, "tensorrt_llm.functional.SliceInputType.data", false]], "dbrxconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.DbrxConfig", false]], "dbrxforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.DbrxForCausalLM", false]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.debug_mode", false]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save", false]], "decode() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.decode", false]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.decode_batch", false]], "decode_duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms", false]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.decode_regular", false]], "decode_retention_priority (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority", false]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.decode_stream", false]], "decode_words_list() (in module tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.decode_words_list", false]], "decodermodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.DecoderModel", false]], "decoding_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.decoding_config", false]], "decoding_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.decoding_config", false]], "decoding_type (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type", false]], "deepseekforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.DeepseekForCausalLM", false]], "deepseekv2attention (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.DeepseekV2Attention", false]], "deepseekv2forcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.DeepseekV2ForCausalLM", false]], "default_plugin_config() (tensorrt_llm.models.cogvlmforcausallm method)": [[85, "tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config", false]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[85, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config", false]], "deferred (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.deferred", false]], "detokenize (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.detokenize", false]], "device (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.device", false]], "device (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.device", false]], "diffusersattention (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.DiffusersAttention", false]], "dimrange (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.DimRange", false]], "directory (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.directory", false]], "disable (tensorrt_llm.functional.sidestreamidtype attribute)": [[83, "tensorrt_llm.functional.SideStreamIDType.disable", false]], "disable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking", false]], "disable_overlap_scheduler (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler", false]], "disaggregated_params (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.disaggregated_params", false]], "disaggregatedparams (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams", false]], "dit (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.DiT", false]], "div() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.div", false]], "dora_plugin() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.dora_plugin", false]], "draft_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens", false]], "draft_tokens_external (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL", false]], "dry_run (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.dry_run", false]], "dtype (tensorrt_llm.functional.tensor property)": [[83, "tensorrt_llm.functional.Tensor.dtype", false]], "dtype (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.dtype", false]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.dtype", false]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[88, "tensorrt_llm.runtime.TensorInfo.dtype", false]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers", false]], "duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms", false]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.dynamic", false]], "dynamic_batch_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[71, "tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config", false]], "dynamic_batch_moving_average_window (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[71, "tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window", false]], "dynamic_tree_max_topk (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK", false]], "dynamicbatchconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.DynamicBatchConfig", false]], "eagle (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.EAGLE", false]], "eagle3_one_model (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model", false]], "eagle_choices (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices", false]], "eagledecodingconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig", false]], "eagleforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.EagleForCausalLM", false]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria", false]], "early_stopping (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.early_stopping", false]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.early_stopping", false]], "einsum() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.einsum", false]], "elementwise_binary() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.elementwise_binary", false]], "embedding (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.Embedding", false]], "embedding() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.embedding", false]], "embedding_bias (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.embedding_bias", false]], "embedding_parallel_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode", false]], "enable_batch_size_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[71, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning", false]], "enable_block_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse", false]], "enable_build_cache (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache", false]], "enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[71, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc", false]], "enable_debug_output (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.enable_debug_output", false]], "enable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking", false]], "enable_iter_perf_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats", false]], "enable_iter_req_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats", false]], "enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker", false]], "enable_max_num_tokens_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[71, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning", false]], "enable_min_latency (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency", false]], "enable_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse", false]], "enable_tqdm (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm", false]], "enable_trtllm_sampler (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.enable_trtllm_sampler", false]], "encdecmodelrunner (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.EncDecModelRunner", false]], "encoder_run() (tensorrt_llm.runtime.encdecmodelrunner method)": [[88, "tensorrt_llm.runtime.EncDecModelRunner.encoder_run", false]], "encodermodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.EncoderModel", false]], "end_id (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.end_id", false]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.end_id", false]], "engine (tensorrt_llm.runtime.session property)": [[88, "tensorrt_llm.runtime.Session.engine", false]], "engine_inspector (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.engine_inspector", false]], "eq() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.eq", false]], "equal_progress (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[71, "tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS", false]], "event_buffer_max_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size", false]], "exclude_input_from_output (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output", false]], "exclude_modules (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.exclude_modules", false]], "exp() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.exp", false]], "expand() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.expand", false]], "expand_dims() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.expand_dims", false]], "expand_dims_like() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.expand_dims_like", false]], "expand_mask() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.expand_mask", false]], "explicit_draft_tokens (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS", false]], "extended_runtime_perf_knob_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config", false]], "extendedruntimeperfknobconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig", false]], "extra_resource_managers (tensorrt_llm.llmapi.torchllmargs property)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers", false]], "falconconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.FalconConfig", false]], "falconforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.FalconForCausalLM", false]], "falconmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.FalconModel", false]], "fast_build (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.fast_build", false]], "fc_gate() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[84, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate", false]], "fc_gate_dora() (in module tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.fc_gate_dora", false]], "fc_gate_lora() (in module tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.fc_gate_lora", false]], "fc_gate_plugin() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[84, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin", false]], "field_name (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "id13", false], [71, "id16", false], [71, "id19", false], [71, "tensorrt_llm.llmapi.TorchLlmArgs.field_name", false]], "field_name (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "id22", false], [71, "id25", false], [71, "id28", false], [71, "id31", false], [71, "id34", false], [71, "tensorrt_llm.llmapi.TrtLlmArgs.field_name", false]], "fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[84, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope", false]], "fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[84, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope", false]], "fill_attention_params() (tensorrt_llm.layers.attention.attention static method)": [[84, "tensorrt_llm.layers.attention.Attention.fill_attention_params", false]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[84, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list", false]], "fill_value (tensorrt_llm.functional.sliceinputtype attribute)": [[83, "tensorrt_llm.functional.SliceInputType.fill_value", false]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits", false]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.finalize_decoder", false]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path", false]], "finish_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.finish_reason", false]], "finished (tensorrt_llm.llmapi.requestoutput attribute)": [[71, "tensorrt_llm.llmapi.RequestOutput.finished", false]], "first_come_first_served (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[71, "tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED", false]], "first_gen_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens", false]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.first_layer", false]], "flatten() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.flatten", false]], "flatten() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.flatten", false]], "flip() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.flip", false]], "floordiv() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.floordiv", false]], "fmt_dim (c macro)": [[1, "c.FMT_DIM", false]], "for_each_rank() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.for_each_rank", false]], "force_num_profiles (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.force_num_profiles", false]], "forward() (tensorrt_llm.layers.activation.mish method)": [[84, "tensorrt_llm.layers.activation.Mish.forward", false]], "forward() (tensorrt_llm.layers.attention.attention method)": [[84, "tensorrt_llm.layers.attention.Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[84, "tensorrt_llm.layers.attention.BertAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.cogvlmattention method)": [[84, "tensorrt_llm.layers.attention.CogVLMAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[84, "tensorrt_llm.layers.attention.DeepseekV2Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[84, "tensorrt_llm.layers.attention.DiffusersAttention.forward", false]], "forward() (tensorrt_llm.layers.cast.cast method)": [[84, "tensorrt_llm.layers.cast.Cast.forward", false]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[84, "tensorrt_llm.layers.conv.Conv1d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[84, "tensorrt_llm.layers.conv.Conv2d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv3d method)": [[84, "tensorrt_llm.layers.conv.Conv3d.forward", false]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[84, "tensorrt_llm.layers.conv.ConvTranspose2d.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteplabelembeddings method)": [[84, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteptextprojembeddings method)": [[84, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[84, "tensorrt_llm.layers.embedding.Embedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.labelembedding method)": [[84, "tensorrt_llm.layers.embedding.LabelEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.pixartalphatextprojection method)": [[84, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward", false]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[84, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[84, "tensorrt_llm.layers.embedding.SD3PatchEmbed.forward", false]], "forward() (tensorrt_llm.layers.embedding.timestepembedding method)": [[84, "tensorrt_llm.layers.embedding.TimestepEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.timesteps method)": [[84, "tensorrt_llm.layers.embedding.Timesteps.forward", false]], "forward() (tensorrt_llm.layers.linear.linearbase method)": [[84, "tensorrt_llm.layers.linear.LinearBase.forward", false]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[84, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[84, "tensorrt_llm.layers.mlp.GatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearactivation method)": [[84, "tensorrt_llm.layers.mlp.LinearActivation.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearapproximategelu method)": [[84, "tensorrt_llm.layers.mlp.LinearApproximateGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargeglu method)": [[84, "tensorrt_llm.layers.mlp.LinearGEGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargelu method)": [[84, "tensorrt_llm.layers.mlp.LinearGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearswiglu method)": [[84, "tensorrt_llm.layers.mlp.LinearSwiGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[84, "tensorrt_llm.layers.mlp.MLP.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernorm method)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormcontinuous method)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzero method)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNormZero.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzerosingle method)": [[84, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward", false]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[84, "tensorrt_llm.layers.normalization.GroupNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[84, "tensorrt_llm.layers.normalization.LayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[84, "tensorrt_llm.layers.normalization.RmsNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.sd35adalayernormzerox method)": [[84, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward", false]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[84, "tensorrt_llm.layers.pooling.AvgPool2d.forward", false]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[85, "tensorrt_llm.models.BertForQuestionAnswering.forward", false]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[85, "tensorrt_llm.models.BertForSequenceClassification.forward", false]], "forward() (tensorrt_llm.models.bertmodel method)": [[85, "tensorrt_llm.models.BertModel.forward", false]], "forward() (tensorrt_llm.models.bloommodel method)": [[85, "tensorrt_llm.models.BloomModel.forward", false]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[85, "tensorrt_llm.models.ChatGLMModel.forward", false]], "forward() (tensorrt_llm.models.clipvisiontransformer method)": [[85, "tensorrt_llm.models.CLIPVisionTransformer.forward", false]], "forward() (tensorrt_llm.models.decodermodel method)": [[85, "tensorrt_llm.models.DecoderModel.forward", false]], "forward() (tensorrt_llm.models.dit method)": [[85, "tensorrt_llm.models.DiT.forward", false]], "forward() (tensorrt_llm.models.eagleforcausallm method)": [[85, "tensorrt_llm.models.EagleForCausalLM.forward", false]], "forward() (tensorrt_llm.models.encodermodel method)": [[85, "tensorrt_llm.models.EncoderModel.forward", false]], "forward() (tensorrt_llm.models.falconmodel method)": [[85, "tensorrt_llm.models.FalconModel.forward", false]], "forward() (tensorrt_llm.models.gptjmodel method)": [[85, "tensorrt_llm.models.GPTJModel.forward", false]], "forward() (tensorrt_llm.models.gptmodel method)": [[85, "tensorrt_llm.models.GPTModel.forward", false]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[85, "tensorrt_llm.models.GPTNeoXModel.forward", false]], "forward() (tensorrt_llm.models.llamamodel method)": [[85, "tensorrt_llm.models.LLaMAModel.forward", false]], "forward() (tensorrt_llm.models.llavanextvisionwrapper method)": [[85, "tensorrt_llm.models.LlavaNextVisionWrapper.forward", false]], "forward() (tensorrt_llm.models.mambaforcausallm method)": [[85, "tensorrt_llm.models.MambaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mllamaforcausallm method)": [[85, "tensorrt_llm.models.MLLaMAForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mptmodel method)": [[85, "tensorrt_llm.models.MPTModel.forward", false]], "forward() (tensorrt_llm.models.optmodel method)": [[85, "tensorrt_llm.models.OPTModel.forward", false]], "forward() (tensorrt_llm.models.phi3model method)": [[85, "tensorrt_llm.models.Phi3Model.forward", false]], "forward() (tensorrt_llm.models.phimodel method)": [[85, "tensorrt_llm.models.PhiModel.forward", false]], "forward() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[85, "tensorrt_llm.models.RecurrentGemmaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.redrafterforcausallm method)": [[85, "tensorrt_llm.models.ReDrafterForCausalLM.forward", false]], "forward() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.forward", false]], "forward() (tensorrt_llm.models.whisperencoder method)": [[85, "tensorrt_llm.models.WhisperEncoder.forward", false]], "forward_with_cfg() (tensorrt_llm.models.dit method)": [[85, "tensorrt_llm.models.DiT.forward_with_cfg", false]], "forward_without_cfg() (tensorrt_llm.models.dit method)": [[85, "tensorrt_llm.models.DiT.forward_without_cfg", false]], "fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.FP8", false]], "fp8_block_scales (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES", false]], "fp8_per_channel_per_token (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN", false]], "free_gpu_memory_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction", false]], "frequency_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.frequency_penalty", false]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty", false]], "from_arguments() (tensorrt_llm.models.speculativedecodingmode static method)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.from_arguments", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedconfig class method)": [[85, "tensorrt_llm.models.PretrainedConfig.from_checkpoint", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[85, "tensorrt_llm.models.PretrainedModel.from_checkpoint", false]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[85, "tensorrt_llm.models.PretrainedModel.from_config", false]], "from_dict() (tensorrt_llm.llmapi.buildconfig class method)": [[71, "tensorrt_llm.llmapi.BuildConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.calibconfig class method)": [[71, "tensorrt_llm.llmapi.CalibConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[71, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.quantconfig class method)": [[71, "tensorrt_llm.llmapi.QuantConfig.from_dict", false]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[85, "tensorrt_llm.models.PretrainedConfig.from_dict", false]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[88, "tensorrt_llm.runtime.ModelRunner.from_dir", false]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir", false]], "from_engine() (tensorrt_llm.runtime.encdecmodelrunner class method)": [[88, "tensorrt_llm.runtime.EncDecModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[88, "tensorrt_llm.runtime.ModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.session static method)": [[88, "tensorrt_llm.runtime.Session.from_engine", false]], "from_hugging_face() (tensorrt_llm.models.baichuanforcausallm class method)": [[85, "tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmconfig class method)": [[85, "tensorrt_llm.models.ChatGLMConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmforcausallm class method)": [[85, "tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cogvlmforcausallm class method)": [[85, "tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cohereforcausallm class method)": [[85, "tensorrt_llm.models.CohereForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekforcausallm class method)": [[85, "tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekv2forcausallm class method)": [[85, "tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.eagleforcausallm class method)": [[85, "tensorrt_llm.models.EagleForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconconfig class method)": [[85, "tensorrt_llm.models.FalconConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconforcausallm class method)": [[85, "tensorrt_llm.models.FalconForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaconfig class method)": [[85, "tensorrt_llm.models.GemmaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[85, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptconfig class method)": [[85, "tensorrt_llm.models.GPTConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptforcausallm class method)": [[85, "tensorrt_llm.models.GPTForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjconfig class method)": [[85, "tensorrt_llm.models.GPTJConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjforcausallm class method)": [[85, "tensorrt_llm.models.GPTJForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaconfig class method)": [[85, "tensorrt_llm.models.LLaMAConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[85, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionconfig class method)": [[85, "tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionwrapper class method)": [[85, "tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mambaforcausallm class method)": [[85, "tensorrt_llm.models.MambaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaconfig class method)": [[85, "tensorrt_llm.models.MedusaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaforcausallm class method)": [[85, "tensorrt_llm.models.MedusaForCausalLm.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mllamaforcausallm class method)": [[85, "tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phi3forcausallm class method)": [[85, "tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phiforcausallm class method)": [[85, "tensorrt_llm.models.PhiForCausalLM.from_hugging_face", false]], "from_json_file() (tensorrt_llm.llmapi.buildconfig class method)": [[71, "tensorrt_llm.llmapi.BuildConfig.from_json_file", false]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[85, "tensorrt_llm.models.PretrainedConfig.from_json_file", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaconfig class method)": [[85, "tensorrt_llm.models.LLaMAConfig.from_meta_ckpt", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[85, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt", false]], "from_nemo() (tensorrt_llm.models.gptconfig class method)": [[85, "tensorrt_llm.models.GPTConfig.from_nemo", false]], "from_nemo() (tensorrt_llm.models.gptforcausallm class method)": [[85, "tensorrt_llm.models.GPTForCausalLM.from_nemo", false]], "from_pretrained() (tensorrt_llm.models.sd3transformer2dmodel class method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.from_pretrained", false]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[88, "tensorrt_llm.runtime.Session.from_serialized_engine", false]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.from_string", false]], "from_string() (tensorrt_llm.functional.rotaryscalingtype static method)": [[83, "tensorrt_llm.functional.RotaryScalingType.from_string", false]], "fuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections", false]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.FusedGatedMLP", false]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[83, "tensorrt_llm.functional.MLPType.FusedGatedMLP", false]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.GatedMLP", false]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[83, "tensorrt_llm.functional.MLPType.GatedMLP", false]], "gather() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gather", false]], "gather_context_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits", false]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gather_last_token_logits", false]], "gather_nd() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gather_nd", false]], "gegelu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gegelu", false]], "geglu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.geglu", false]], "gelu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gelu", false]], "gemm_allreduce() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gemm_allreduce", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin", false]], "gemm_swiglu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gemm_swiglu", false]], "gemma2_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[85, "tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS", false]], "gemma2_config() (tensorrt_llm.models.gemmaconfig method)": [[85, "tensorrt_llm.models.GemmaConfig.gemma2_config", false]], "gemma3_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[85, "tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS", false]], "gemma3_config() (tensorrt_llm.models.gemmaconfig method)": [[85, "tensorrt_llm.models.GemmaConfig.gemma3_config", false]], "gemma_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[85, "tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS", false]], "gemmaconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GemmaConfig", false]], "gemmaforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GemmaForCausalLM", false]], "generate() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.generate", false]], "generate() (tensorrt_llm.runtime.encdecmodelrunner method)": [[88, "tensorrt_llm.runtime.EncDecModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[88, "tensorrt_llm.runtime.ModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.generate", false]], "generate() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[88, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate", false]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.generate_alibi_biases", false]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.generate_alibi_slopes", false]], "generate_async() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.generate_async", false]], "generate_logn_scaling() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.generate_logn_scaling", false]], "generation_logits (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.generation_logits", false]], "generationsequence (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.GenerationSequence", false]], "generationsession (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.GenerationSession", false]], "get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid", false]], "get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed", false]], "get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid", false]], "get_audio_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features", false]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[88, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx", false]], "get_block_offsets() (tensorrt_llm.runtime.kvcachemanager method)": [[88, "tensorrt_llm.runtime.KVCacheManager.get_block_offsets", false]], "get_comm() (tensorrt_llm.llmapi.mpicommsession method)": [[71, "tensorrt_llm.llmapi.MpiCommSession.get_comm", false]], "get_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.get_config_group", false]], "get_context_phase_params() (tensorrt_llm.llmapi.disaggregatedparams method)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params", false]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[84, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value", false]], "get_hf_config() (tensorrt_llm.models.gemmaconfig static method)": [[85, "tensorrt_llm.models.GemmaConfig.get_hf_config", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.get_kv_cache_events", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.get_kv_cache_events_async", false]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens", false]], "get_num_heads_kv() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.get_num_heads_kv", false]], "get_parent() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.get_parent", false]], "get_pytorch_backend_config() (tensorrt_llm.llmapi.torchllmargs method)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.get_pytorch_backend_config", false]], "get_request_type() (tensorrt_llm.llmapi.disaggregatedparams method)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.get_request_type", false]], "get_rope_index() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index", false]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[88, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx", false]], "get_stats() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.get_stats", false]], "get_stats_async() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.get_stats_async", false]], "get_timestep_embedding() (in module tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.get_timestep_embedding", false]], "get_users() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.get_users", false]], "get_visual_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features", false]], "get_weight() (tensorrt_llm.layers.linear.linearbase method)": [[84, "tensorrt_llm.layers.linear.LinearBase.get_weight", false]], "gpt_attention() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gpt_attention", false]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin", false]], "gptconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTConfig", false]], "gptforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTForCausalLM", false]], "gptjconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTJConfig", false]], "gptjforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTJForCausalLM", false]], "gptjmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTJModel", false]], "gptmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTModel", false]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTNeoXForCausalLM", false]], "gptneoxmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.GPTNeoXModel", false]], "gpu_weights_percent (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.gpu_weights_percent", false]], "grammar (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams.grammar", false]], "greedy_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling", false]], "group_norm() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.group_norm", false]], "group_size (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.group_size", false]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.GroupNorm", false]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[83, "tensorrt_llm.functional.LayerNormType.GroupNorm", false]], "gt() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.gt", false]], "guaranteed_no_evict (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[71, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT", false]], "guided_decoding (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.guided_decoding", false]], "guideddecodingparams (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams", false]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.handle_per_step", false]], "has_affine() (tensorrt_llm.functional.allreduceparams method)": [[83, "tensorrt_llm.functional.AllReduceParams.has_affine", false]], "has_bias() (tensorrt_llm.functional.allreduceparams method)": [[83, "tensorrt_llm.functional.AllReduceParams.has_bias", false]], "has_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.has_config_group", false]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.has_position_embedding", false]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.has_position_embedding", false]], "has_scale() (tensorrt_llm.functional.allreduceparams method)": [[83, "tensorrt_llm.functional.AllReduceParams.has_scale", false]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding", false]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding", false]], "has_zero_point (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.has_zero_point", false]], "head_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.head_size", false]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.head_size", false]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size", false]], "host_cache_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.host_cache_size", false]], "identity() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.identity", false]], "ignore_eos (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.ignore_eos", false]], "include_stop_str_in_output (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output", false]], "index (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.index", false]], "index_select() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.index_select", false]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[88, "tensorrt_llm.runtime.Session.infer_shapes", false]], "inflight (tensorrt_llm.llmapi.batchingtype attribute)": [[71, "tensorrt_llm.llmapi.BatchingType.INFLIGHT", false]], "init_audio_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder", false]], "init_image_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder", false]], "init_llm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.init_llm", false]], "init_processor() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.init_processor", false]], "init_tokenizer() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer", false]], "input_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.input_timing_cache", false]], "int8 (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.INT8", false]], "int_clip() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.int_clip", false]], "interpolate() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.interpolate", false]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi", false]], "is_deferred() (tensorrt_llm.functional.positionembeddingtype method)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.is_deferred", false]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.is_dynamic", false]], "is_gated_activation() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.is_gated_activation", false]], "is_gemma_2 (tensorrt_llm.models.gemmaconfig property)": [[85, "tensorrt_llm.models.GemmaConfig.is_gemma_2", false]], "is_gemma_3 (tensorrt_llm.models.gemmaconfig property)": [[85, "tensorrt_llm.models.GemmaConfig.is_gemma_3", false]], "is_keep_all (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all", false]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode", false]], "is_module_excluded_from_quantization() (tensorrt_llm.llmapi.quantconfig method)": [[71, "tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization", false]], "is_mrope() (tensorrt_llm.functional.positionembeddingtype method)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.is_mrope", false]], "is_public_pool (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool", false]], "is_redrafter_mode (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.is_redrafter_mode", false]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.is_rope", false]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.is_trt_wrapper", false]], "is_use_oldest (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest", false]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[84, "tensorrt_llm.layers.attention.AttentionParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[84, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid", false]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[84, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn", false]], "joint_attn_forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[84, "tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward", false]], "json (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams.json", false]], "json_object (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams.json_object", false]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.KeyValueCacheParams", false]], "kv_cache_dtype (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_dtype", false]], "kv_cache_quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo", false]], "kv_cache_type (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.kv_cache_type", false]], "kv_dtype (tensorrt_llm.models.pretrainedconfig property)": [[85, "tensorrt_llm.models.PretrainedConfig.kv_dtype", false]], "kvcacheconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.KvCacheConfig", false]], "kvcachemanager (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.KVCacheManager", false]], "kvcacheretentionconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig", false]], "kvcacheretentionconfig.tokenrangeretentionconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig", false]], "labelembedding (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.LabelEmbedding", false]], "language_adapter_config (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.language_adapter_config", false]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.last_layer", false]], "last_process_for_ub (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB", false]], "layer_norm() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.layer_norm", false]], "layer_quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[71, "tensorrt_llm.llmapi.QuantConfig.layer_quant_mode", false]], "layer_types (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.layer_types", false]], "layernorm (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.LayerNorm", false]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[83, "tensorrt_llm.functional.LayerNormType.LayerNorm", false]], "layernormpositiontype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.LayerNormPositionType", false]], "layernormtype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.LayerNormType", false]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute", false]], "length (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.length", false]], "length (tensorrt_llm.llmapi.completionoutput property)": [[71, "id3", false]], "length_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.length_penalty", false]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.length_penalty", false]], "linear (class in tensorrt_llm.layers.linear)": [[84, "tensorrt_llm.layers.linear.Linear", false]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.linear", false]], "linearactivation (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.LinearActivation", false]], "linearapproximategelu (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.LinearApproximateGELU", false]], "linearbase (class in tensorrt_llm.layers.linear)": [[84, "tensorrt_llm.layers.linear.LinearBase", false]], "lineargeglu (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.LinearGEGLU", false]], "lineargelu (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.LinearGELU", false]], "linearswiglu (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.LinearSwiGLU", false]], "llama3 (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.llama3", false]], "llamaconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.LLaMAConfig", false]], "llamaforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.LLaMAForCausalLM", false]], "llamamodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.LLaMAModel", false]], "llavanextvisionconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.LlavaNextVisionConfig", false]], "llavanextvisionwrapper (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.LlavaNextVisionWrapper", false]], "llm (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.LLM", false]], "llm_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir", false]], "llm_id (tensorrt_llm.llmapi.llm attribute)": [[71, "tensorrt_llm.llmapi.LLM.llm_id", false]], "llm_id (tensorrt_llm.llmapi.llm property)": [[71, "id0", false]], "llmargs (in module tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.LlmArgs", false]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[85, "tensorrt_llm.models.PretrainedModel.load", false]], "load() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.load", false]], "load_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.load_format", false]], "load_test_audio() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio", false]], "load_test_data() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_data", false]], "locate_accepted_draft_tokens() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens", false]], "location (tensorrt_llm.functional.tensor property)": [[83, "tensorrt_llm.functional.Tensor.location", false]], "log() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.log", false]], "log() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.log", false]], "log_softmax() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.log_softmax", false]], "logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.logits_processor", false]], "logitsprocessor (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.LogitsProcessor", false]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.LogitsProcessorList", false]], "logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.logprobs", false]], "logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.logprobs", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.logprobs_diff", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput property)": [[71, "id4", false]], "long_rope (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.long_rope", false]], "longrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.longrope", false]], "lookahead_config (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.lookahead_config", false]], "lookahead_decoding (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING", false]], "lookaheaddecodingconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig", false]], "lora_config (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.lora_config", false]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.lora_plugin", false]], "lora_plugin() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.lora_plugin", false]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.lora_target_modules", false]], "low_latency_gemm() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.low_latency_gemm", false]], "low_latency_gemm_swiglu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.low_latency_gemm_swiglu", false]], "lowprecision (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION", false]], "lt() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.lt", false]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.make_causal_mask", false]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.mamba_conv1d", false]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin", false]], "mambaforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.MambaForCausalLM", false]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.mapping", false]], "mapping (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.mapping", false]], "mark_output() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.mark_output", false]], "masked_scatter() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.masked_scatter", false]], "masked_select() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.masked_select", false]], "matmul() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.matmul", false]], "max() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.max", false]], "max() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.max", false]], "max_attention_window (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.max_attention_window", false]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size", false]], "max_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.max_batch_size", false]], "max_beam_width (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_beam_width", false]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.max_beam_width", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig property)": [[71, "id9", false]], "max_cpu_loras (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.max_cpu_loras", false]], "max_cpu_loras (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.max_cpu_loras", false]], "max_draft_len (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_draft_len", false]], "max_draft_tokens (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.max_draft_tokens", false]], "max_encoder_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len", false]], "max_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_input_len", false]], "max_lora_rank (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.max_lora_rank", false]], "max_lora_rank (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.max_lora_rank", false]], "max_loras (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.max_loras", false]], "max_loras (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.max_loras", false]], "max_matching_ngram_size (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size", false]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens", false]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens", false]], "max_ngram_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size", false]], "max_non_leaves_per_layer (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer", false]], "max_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[71, "tensorrt_llm.llmapi.CacheTransceiverConfig.max_num_tokens", false]], "max_prompt_embedding_table_size (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildCacheConfig.max_records", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig property)": [[71, "id10", false]], "max_seq_len (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.max_seq_len", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.max_sequence_length", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length", false]], "max_tokens (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.max_tokens", false]], "max_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.max_tokens", false]], "max_utilization (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[71, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION", false]], "max_verification_set_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size", false]], "max_window_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size", false]], "maximum() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.maximum", false]], "mean() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.mean", false]], "mean() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.mean", false]], "medusa (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA", false]], "medusa_choices (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices", false]], "medusa_decode_and_verify() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify", false]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.medusa_paths", false]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets", false]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.medusa_temperature", false]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.medusa_topks", false]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids", false]], "medusaconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.MedusaConfig", false]], "medusadecodingconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.MedusaDecodingConfig", false]], "medusaforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.MedusaForCausalLm", false]], "meshgrid2d() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.meshgrid2d", false]], "min() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.min", false]], "min_latency (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY", false]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.min_length", false]], "min_p (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.min_p", false]], "min_p (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.min_p", false]], "min_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.min_tokens", false]], "minimum() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.minimum", false]], "mish (class in tensorrt_llm.layers.activation)": [[84, "tensorrt_llm.layers.activation.Mish", false]], "mixed_precision (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION", false]], "mixed_sampler (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.mixed_sampler", false]], "mllamaforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.MLLaMAForCausalLM", false]], "mlp (class in tensorrt_llm.layers.mlp)": [[84, "tensorrt_llm.layers.mlp.MLP", false]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[83, "tensorrt_llm.functional.MLPType.MLP", false]], "mlptype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.MLPType", false]], "model": [[31, "cmdoption-trtllm-serve-serve-arg-MODEL", false]], "model_config (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[71, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[71, "tensorrt_llm.llmapi.DynamicBatchConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[71, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[71, "tensorrt_llm.llmapi.SchedulerConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.model_config", false]], "model_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.model_config", false]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.model_name", false]], "model_post_init() (tensorrt_llm.llmapi.torchllmargs method)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.trtllmargs method)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.model_post_init", false]], "modelconfig (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.ModelConfig", false]], "modelrunner (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.ModelRunner", false]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp", false]], "module": [[83, "module-tensorrt_llm", false], [83, "module-tensorrt_llm.functional", false], [84, "module-tensorrt_llm", false], [84, "module-tensorrt_llm.layers.activation", false], [84, "module-tensorrt_llm.layers.attention", false], [84, "module-tensorrt_llm.layers.cast", false], [84, "module-tensorrt_llm.layers.conv", false], [84, "module-tensorrt_llm.layers.embedding", false], [84, "module-tensorrt_llm.layers.linear", false], [84, "module-tensorrt_llm.layers.mlp", false], [84, "module-tensorrt_llm.layers.normalization", false], [84, "module-tensorrt_llm.layers.pooling", false], [85, "module-tensorrt_llm", false], [85, "module-tensorrt_llm.models", false], [86, "module-tensorrt_llm", false], [86, "module-tensorrt_llm.plugin", false], [87, "module-tensorrt_llm", false], [87, "module-tensorrt_llm.quantization", false], [88, "module-tensorrt_llm", false], [88, "module-tensorrt_llm.runtime", false]], "modulo() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.modulo", false]], "moe (tensorrt_llm.functional.sidestreamidtype attribute)": [[83, "tensorrt_llm.functional.SideStreamIDType.moe", false]], "moe_allreduce_residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.MOE_ALLREDUCE_RESIDUAL_RMS_NORM", false]], "moe_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.moe_backend", false]], "moe_load_balancer (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.moe_load_balancer", false]], "moe_max_num_tokens (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.moe_max_num_tokens", false]], "monitor_memory (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.monitor_memory", false]], "mpicommsession (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.MpiCommSession", false]], "mptforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.MPTForCausalLM", false]], "mptmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.MPTModel", false]], "mrope (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.mrope", false]], "mrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.mrope", false]], "mropeparams (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.MropeParams", false]], "msg (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "id11", false], [71, "id14", false], [71, "id17", false], [71, "tensorrt_llm.llmapi.TorchLlmArgs.msg", false]], "msg (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "id20", false], [71, "id23", false], [71, "id26", false], [71, "id29", false], [71, "id32", false], [71, "tensorrt_llm.llmapi.TrtLlmArgs.msg", false]], "mtpdecodingconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig", false]], "mul() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.mul", false]], "multi_block_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[71, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode", false]], "multimodalmodelrunner (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner", false]], "multiply_and_lora() (tensorrt_llm.layers.linear.linearbase method)": [[84, "tensorrt_llm.layers.linear.LinearBase.multiply_and_lora", false]], "multiply_collect() (tensorrt_llm.layers.linear.linearbase method)": [[84, "tensorrt_llm.layers.linear.LinearBase.multiply_collect", false]], "multiply_collect() (tensorrt_llm.layers.linear.rowlinear method)": [[84, "tensorrt_llm.layers.linear.RowLinear.multiply_collect", false]], "n (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.n", false]], "name (tensorrt_llm.functional.tensor property)": [[83, "tensorrt_llm.functional.Tensor.name", false]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[88, "tensorrt_llm.runtime.TensorInfo.name", false]], "native_quant_flow (tensorrt_llm.models.gemmaforcausallm attribute)": [[85, "tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW", false]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.NCCL", false]], "ndim() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.ndim", false]], "network (tensorrt_llm.functional.tensor property)": [[83, "tensorrt_llm.functional.Tensor.network", false]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids", false]], "ngram (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.NGRAM", false]], "ngramdecodingconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig", false]], "no_quant (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.NO_QUANT", false]], "no_repeat_ngram_size (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size", false]], "no_repeat_ngram_size (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size", false]], "non_gated_version() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.non_gated_version", false]], "none (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.NONE", false]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.none", false]], "none (tensorrt_llm.models.speculativedecodingmode attribute)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode.NONE", false]], "nonzero() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.nonzero", false]], "not_op() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.not_op", false]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.num_beams", false]], "num_draft_tokens (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.num_draft_tokens", false]], "num_eagle_layers (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers", false]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads", false]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.num_kv_heads", false]], "num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer", false]], "num_kv_heads_per_layer (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer", false]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers", false]], "num_medusa_heads (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads", false]], "num_nextn_predict_layers (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers", false]], "num_return_sequences (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.num_return_sequences", false]], "numel() (tensorrt_llm.runtime.tensorinfo method)": [[88, "tensorrt_llm.runtime.TensorInfo.numel", false]], "nvfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.NVFP4", false]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1", false]], "onboard_blocks (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks", false]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT", false]], "op_and() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.op_and", false]], "op_or() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.op_or", false]], "op_xor() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.op_xor", false]], "opaque_state (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.opaque_state", false]], "opt_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.opt_batch_size", false]], "opt_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.opt_num_tokens", false]], "optforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.OPTForCausalLM", false]], "optmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.OPTModel", false]], "outer() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.outer", false]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs", false]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.output_log_probs", false]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths", false]], "output_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.output_timing_cache", false]], "outputs (tensorrt_llm.llmapi.requestoutput attribute)": [[71, "tensorrt_llm.llmapi.RequestOutput.outputs", false]], "pad() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.pad", false]], "pad_id (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.pad_id", false]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.pad_id", false]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.padding", false]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache", false]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.paged_state", false]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.paged_state", false]], "permute() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.permute", false]], "permute() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.permute", false]], "phi3forcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.Phi3ForCausalLM", false]], "phi3model (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.Phi3Model", false]], "phiforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.PhiForCausalLM", false]], "phimodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.PhiModel", false]], "pixartalphatextprojection (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection", false]], "plugin_config (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.plugin_config", false]], "pluginconfig (class in tensorrt_llm.plugin)": [[86, "tensorrt_llm.plugin.PluginConfig", false]], "positionembeddingtype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.PositionEmbeddingType", false]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[83, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm", false]], "posterior_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold", false]], "postprocess() (tensorrt_llm.layers.attention.attention method)": [[84, "tensorrt_llm.layers.attention.Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[84, "tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.embedding.embedding method)": [[84, "tensorrt_llm.layers.embedding.Embedding.postprocess", false]], "postprocess() (tensorrt_llm.layers.linear.linear method)": [[84, "tensorrt_llm.layers.linear.Linear.postprocess", false]], "pow() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.pow", false]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids", false]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens", false]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[83, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm", false]], "pre_quant_scale (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.pre_quant_scale", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.decodermodel method)": [[85, "tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.encodermodel method)": [[85, "tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.whisperencoder method)": [[85, "tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias", false]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[85, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[85, "tensorrt_llm.models.DecoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.dit method)": [[85, "tensorrt_llm.models.DiT.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.eagleforcausallm method)": [[85, "tensorrt_llm.models.EagleForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[85, "tensorrt_llm.models.EncoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.llavanextvisionwrapper method)": [[85, "tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mambaforcausallm method)": [[85, "tensorrt_llm.models.MambaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mllamaforcausallm method)": [[85, "tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[85, "tensorrt_llm.models.PretrainedModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[85, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.redrafterforcausallm method)": [[85, "tensorrt_llm.models.ReDrafterForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[85, "tensorrt_llm.models.WhisperEncoder.prepare_inputs", false]], "prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm", false]], "prepare_recurrent_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[85, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs", false]], "preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.preprocess", false]], "presence_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.presence_penalty", false]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.presence_penalty", false]], "pretrainedconfig (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.PretrainedConfig", false]], "pretrainedmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.PretrainedModel", false]], "print_iter_log (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log", false]], "priority (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority", false]], "process_input() (tensorrt_llm.runtime.encdecmodelrunner method)": [[88, "tensorrt_llm.runtime.EncDecModelRunner.process_input", false]], "process_logits_including_draft() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.process_logits_including_draft", false]], "prod() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.prod", false]], "profiler (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.profiler", false]], "profiling_verbosity (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.profiling_verbosity", false]], "prompt (tensorrt_llm.llmapi.requestoutput attribute)": [[71, "tensorrt_llm.llmapi.RequestOutput.prompt", false]], "prompt (tensorrt_llm.llmapi.requestoutput property)": [[71, "id7", false]], "prompt_logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs", false]], "prompt_logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.prompt_logprobs", false]], "prompt_lookup_num_tokens (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.NGramDecodingConfig.prompt_lookup_num_tokens", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput attribute)": [[71, "tensorrt_llm.llmapi.RequestOutput.prompt_token_ids", false]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.PromptTuningEmbedding", false]], "ptuning_setup() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup", false]], "ptuning_setup_fuyu() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu", false]], "ptuning_setup_llava_next() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next", false]], "ptuning_setup_phi3() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3", false]], "ptuning_setup_pixtral() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral", false]], "python_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.python_e2e", false]], "pytorch_eagle_weights_path (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.pytorch_eagle_weights_path", false]], "quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.quant_algo", false]], "quant_algo (tensorrt_llm.models.pretrainedconfig property)": [[85, "tensorrt_llm.models.PretrainedConfig.quant_algo", false]], "quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[71, "tensorrt_llm.llmapi.QuantConfig.quant_mode", false]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[85, "tensorrt_llm.models.PretrainedConfig.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.quant_mode", false]], "quantalgo (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.QuantAlgo", false]], "quantalgo (class in tensorrt_llm.quantization)": [[87, "tensorrt_llm.quantization.QuantAlgo", false]], "quantconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.QuantConfig", false]], "quantize() (tensorrt_llm.models.baichuanforcausallm class method)": [[85, "tensorrt_llm.models.BaichuanForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.chatglmforcausallm class method)": [[85, "tensorrt_llm.models.ChatGLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.cogvlmforcausallm class method)": [[85, "tensorrt_llm.models.CogVLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gemmaforcausallm class method)": [[85, "tensorrt_llm.models.GemmaForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gptforcausallm class method)": [[85, "tensorrt_llm.models.GPTForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[85, "tensorrt_llm.models.LLaMAForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[85, "tensorrt_llm.models.PretrainedModel.quantize", false]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[87, "tensorrt_llm.quantization.quantize_and_export", false]], "quantmode (class in tensorrt_llm.quantization)": [[87, "tensorrt_llm.quantization.QuantMode", false]], "quick_gelu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.quick_gelu", false]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession", false]], "rand() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.rand", false]], "random_seed (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.random_seed", false]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.random_seed", false]], "rank() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.rank", false]], "rearrange() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.rearrange", false]], "recurrentgemmaforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.RecurrentGemmaForCausalLM", false]], "recv() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.recv", false]], "redrafter_draft_len_per_beam (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam", false]], "redrafter_num_beams (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.redrafter_num_beams", false]], "redrafterforcausallm (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.ReDrafterForCausalLM", false]], "reduce() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.reduce", false]], "reduce_scatter() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.reduce_scatter", false]], "regex (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams.regex", false]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.relative", false]], "relaxed_delta (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta", false]], "relaxed_topk (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk", false]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[85, "tensorrt_llm.models.PretrainedModel.release", false]], "relu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.relu", false]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding", false]], "reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search", false]], "repeat() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.repeat", false]], "repeat() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.repeat", false]], "repeat_interleave() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.repeat_interleave", false]], "repetition_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.repetition_penalty", false]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty", false]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.replace_all_uses_with", false]], "request_id (tensorrt_llm.llmapi.requestoutput attribute)": [[71, "tensorrt_llm.llmapi.RequestOutput.request_id", false]], "request_type (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[71, "tensorrt_llm.llmapi.DisaggregatedParams.request_type", false]], "requesterror (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.RequestError", false]], "requestoutput (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.RequestOutput", false]], "residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM", false]], "residual_rms_norm_out_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8", false]], "residual_rms_norm_out_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4", false]], "residual_rms_norm_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8", false]], "residual_rms_norm_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4", false]], "residual_rms_prepost_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[83, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM", false]], "return_context_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.return_context_logits", false]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.return_dict", false]], "return_encoder_output (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.return_encoder_output", false]], "return_generation_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.return_generation_logits", false]], "return_perf_metrics (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.return_perf_metrics", false]], "rg_lru() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.rg_lru", false]], "rms_norm() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.rms_norm", false]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.RmsNorm", false]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[83, "tensorrt_llm.functional.LayerNormType.RmsNorm", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size", false]], "rnn_head_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.rnn_head_size", false]], "rnn_head_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.rnn_head_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.rnn_hidden_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.rnn_hidden_size", false]], "robertaforquestionanswering (in module tensorrt_llm.models)": [[85, "tensorrt_llm.models.RobertaForQuestionAnswering", false]], "robertaforsequenceclassification (in module tensorrt_llm.models)": [[85, "tensorrt_llm.models.RobertaForSequenceClassification", false]], "robertamodel (in module tensorrt_llm.models)": [[85, "tensorrt_llm.models.RobertaModel", false]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox", false]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj", false]], "ropeembeddingutils (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils", false]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.RotaryScalingType", false]], "rotate_every_two() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two", false]], "rotate_half() (tensorrt_llm.functional.ropeembeddingutils static method)": [[83, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half", false]], "round() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.round", false]], "rowlinear (class in tensorrt_llm.layers.linear)": [[84, "tensorrt_llm.layers.linear.RowLinear", false]], "run() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.run", false]], "run() (tensorrt_llm.runtime.session method)": [[88, "tensorrt_llm.runtime.Session.run", false]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[88, "tensorrt_llm.runtime.GenerationSession.runtime", false]], "runtime (tensorrt_llm.runtime.session property)": [[88, "tensorrt_llm.runtime.Session.runtime", false]], "samplingconfig (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.SamplingConfig", false]], "samplingparams (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.SamplingParams", false]], "save() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.save", false]], "save_checkpoint() (tensorrt_llm.models.llavanextvisionwrapper method)": [[85, "tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint", false]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[85, "tensorrt_llm.models.PretrainedModel.save_checkpoint", false]], "scatter() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.scatter", false]], "scatter_nd() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.scatter_nd", false]], "schedulerconfig (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.SchedulerConfig", false]], "sd35adalayernormzerox (class in tensorrt_llm.layers.normalization)": [[84, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX", false]], "sd3patchembed (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.SD3PatchEmbed", false]], "sd3transformer2dmodel (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.SD3Transformer2DModel", false]], "secondary_offload_min_priority (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority", false]], "seed (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.seed", false]], "select() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.select", false]], "select() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.select", false]], "selective_scan() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.selective_scan", false]], "send() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.send", false]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[88, "tensorrt_llm.runtime.ModelRunner.serialize_engine", false]], "session (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.Session", false]], "set_attn_processor() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor", false]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL", false]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist", false]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.set_rank", false]], "set_rel_attn_table() (tensorrt_llm.layers.attention.attention method)": [[84, "tensorrt_llm.layers.attention.Attention.set_rel_attn_table", false]], "set_shapes() (tensorrt_llm.runtime.session method)": [[88, "tensorrt_llm.runtime.Session.set_shapes", false]], "setup() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.setup", false]], "setup_fake_prompts() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts", false]], "setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl", false]], "setup_fake_prompts_vila() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila", false]], "setup_inputs() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs", false]], "shape (tensorrt_llm.functional.tensor property)": [[83, "tensorrt_llm.functional.Tensor.shape", false]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[88, "tensorrt_llm.runtime.TensorInfo.shape", false]], "shape() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.shape", false]], "shutdown() (tensorrt_llm.llmapi.llm method)": [[71, "tensorrt_llm.llmapi.LLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.mpicommsession method)": [[71, "tensorrt_llm.llmapi.MpiCommSession.shutdown", false]], "sidestreamidtype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.SideStreamIDType", false]], "sigmoid() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.sigmoid", false]], "silu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.silu", false]], "sin() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.sin", false]], "sink_token_length (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[71, "tensorrt_llm.llmapi.KvCacheConfig.sink_token_length", false]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.sink_token_length", false]], "size (tensorrt_llm.functional.sliceinputtype attribute)": [[83, "tensorrt_llm.functional.SliceInputType.size", false]], "size() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.size", false]], "skip_cross_attn_blocks (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks", false]], "skip_cross_kv (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.skip_cross_kv", false]], "skip_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.skip_special_tokens", false]], "slice() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.slice", false]], "sliceinputtype (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.SliceInputType", false]], "sliding_window_causal (tensorrt_llm.functional.attentionmasktype attribute)": [[83, "tensorrt_llm.functional.AttentionMaskType.sliding_window_causal", false]], "smoothquant_val (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.smoothquant_val", false]], "softmax() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.softmax", false]], "softplus() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.softplus", false]], "spaces_between_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens", false]], "specdecodingparams (class in tensorrt_llm.layers.attention)": [[84, "tensorrt_llm.layers.attention.SpecDecodingParams", false]], "speculative_decoding_mode (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode", false]], "speculativedecodingmode (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.SpeculativeDecodingMode", false]], "split() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.split", false]], "split() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.split", false]], "split_prompt_by_images() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images", false]], "sqrt() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.sqrt", false]], "sqrt() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.sqrt", false]], "squared_relu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.squared_relu", false]], "squeeze() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.squeeze", false]], "squeeze() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.squeeze", false]], "squeeze() (tensorrt_llm.runtime.tensorinfo method)": [[88, "tensorrt_llm.runtime.TensorInfo.squeeze", false]], "stack() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.stack", false]], "start (tensorrt_llm.functional.sliceinputtype attribute)": [[83, "tensorrt_llm.functional.SliceInputType.start", false]], "state_dtype (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.state_dtype", false]], "state_dtype (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.state_dtype", false]], "state_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.state_size", false]], "state_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.state_size", false]], "static (tensorrt_llm.llmapi.batchingtype attribute)": [[71, "tensorrt_llm.llmapi.BatchingType.STATIC", false]], "static_batch (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[71, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH", false]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[88, "tensorrt_llm.runtime.KVCacheManager.step", false]], "stop (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.stop", false]], "stop_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.stop_reason", false]], "stop_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.stop_token_ids", false]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.stop_words_list", false]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.StoppingCriteria", false]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.StoppingCriteriaList", false]], "stride (tensorrt_llm.functional.sliceinputtype attribute)": [[83, "tensorrt_llm.functional.SliceInputType.stride", false]], "strongly_typed (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.strongly_typed", false]], "structural_tag (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[71, "tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag", false]], "sub() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.sub", false]], "submit() (tensorrt_llm.llmapi.mpicommsession method)": [[71, "tensorrt_llm.llmapi.MpiCommSession.submit", false]], "submit_sync() (tensorrt_llm.llmapi.mpicommsession method)": [[71, "tensorrt_llm.llmapi.MpiCommSession.submit_sync", false]], "sum() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.sum", false]], "swiglu() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.swiglu", false]], "tanh() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.tanh", false]], "temperature (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.temperature", false]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.temperature", false]], "tensor (class in tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.Tensor", false]], "tensorinfo (class in tensorrt_llm.runtime)": [[88, "tensorrt_llm.runtime.TensorInfo", false]], "tensorrt_llm": [[83, "module-tensorrt_llm", false], [84, "module-tensorrt_llm", false], [85, "module-tensorrt_llm", false], [86, "module-tensorrt_llm", false], [87, "module-tensorrt_llm", false], [88, "module-tensorrt_llm", false]], "tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm", false], [1, "_CPPv412tensorrt_llm", false]], "tensorrt_llm.functional": [[83, "module-tensorrt_llm.functional", false]], "tensorrt_llm.layers.activation": [[84, "module-tensorrt_llm.layers.activation", false]], "tensorrt_llm.layers.attention": [[84, "module-tensorrt_llm.layers.attention", false]], "tensorrt_llm.layers.cast": [[84, "module-tensorrt_llm.layers.cast", false]], "tensorrt_llm.layers.conv": [[84, "module-tensorrt_llm.layers.conv", false]], "tensorrt_llm.layers.embedding": [[84, "module-tensorrt_llm.layers.embedding", false]], "tensorrt_llm.layers.linear": [[84, "module-tensorrt_llm.layers.linear", false]], "tensorrt_llm.layers.mlp": [[84, "module-tensorrt_llm.layers.mlp", false]], "tensorrt_llm.layers.normalization": [[84, "module-tensorrt_llm.layers.normalization", false]], "tensorrt_llm.layers.pooling": [[84, "module-tensorrt_llm.layers.pooling", false]], "tensorrt_llm.models": [[85, "module-tensorrt_llm.models", false]], "tensorrt_llm.plugin": [[86, "module-tensorrt_llm.plugin", false]], "tensorrt_llm.quantization": [[87, "module-tensorrt_llm.quantization", false]], "tensorrt_llm.runtime": [[88, "module-tensorrt_llm.runtime", false]], "tensorrt_llm::batch_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_managerE", false], [1, "_CPPv4N12tensorrt_llm13batch_managerE", false]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", false]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE", false]], "tensorrt_llm::executor::additionalmodeloutput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", false]], "tensorrt_llm::executor::additionalmodeloutput::additionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", false]], "tensorrt_llm::executor::additionalmodeloutput::gathercontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", false]], "tensorrt_llm::executor::additionalmodeloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", false]], "tensorrt_llm::executor::additionalmodeloutput::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", false]], "tensorrt_llm::executor::additionaloutput (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", false]], "tensorrt_llm::executor::additionaloutput::additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", false]], "tensorrt_llm::executor::additionaloutput::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::output (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", false]], "tensorrt_llm::executor::additionaloutput::~additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", false]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", false]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", false]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", false]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", false]], "tensorrt_llm::executor::bufferview (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BufferViewE", false]], "tensorrt_llm::executor::cachetransceiverconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", false]], "tensorrt_llm::executor::cachetransceiverconfig::cachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI6size_tEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::cachetransceiverconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::cachetransceiverconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig15setMaxNumTokensE6size_t", false]], "tensorrt_llm::executor::capacityschedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kstatic_batch (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", false]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", false]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", false]], "tensorrt_llm::executor::communicationmode::korchestrator (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", false]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", false]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", false]], "tensorrt_llm::executor::contextchunkingpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kequal_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kfirst_come_first_served (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", false]], "tensorrt_llm::executor::contextphaseparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", false]], "tensorrt_llm::executor::contextphaseparams::contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::deleter (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", false]], "tensorrt_llm::executor::contextphaseparams::getdrafttokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getfirstgentokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getreqid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", false]], "tensorrt_llm::executor::contextphaseparams::getserializedstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", false]], "tensorrt_llm::executor::contextphaseparams::getstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false], [0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false]], "tensorrt_llm::executor::contextphaseparams::mdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mfirstgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mreqid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", false]], "tensorrt_llm::executor::contextphaseparams::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", false]], "tensorrt_llm::executor::contextphaseparams::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::popfirstgentokens (c++ function)": [[0, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::releasestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", false]], "tensorrt_llm::executor::contextphaseparams::requestidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", false]], "tensorrt_llm::executor::contextphaseparams::stateptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", false]], "tensorrt_llm::executor::contextphaseparams::~contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", false]], "tensorrt_llm::executor::datatransceiverstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", false]], "tensorrt_llm::executor::datatransceiverstate::datatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", false], [0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcachestate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::mcachestate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::mcommstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", false]], "tensorrt_llm::executor::datatransceiverstate::setcachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::setcommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", false]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE", false]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", false]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", false]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", false]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", false]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", false]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", false]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", false]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", false]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", false]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", false]], "tensorrt_llm::executor::debugconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", false]], "tensorrt_llm::executor::debugconfig::debugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", false]], "tensorrt_llm::executor::debugconfig::getdebuginputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugoutputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensornames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", false]], "tensorrt_llm::executor::debugconfig::mdebuginputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugoutputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensornames (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensorsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", false]], "tensorrt_llm::executor::debugconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", false]], "tensorrt_llm::executor::debugconfig::setdebuginputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugoutputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugtensornames (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", false]], "tensorrt_llm::executor::debugconfig::setdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::debugconfig::stringvec (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", false]], "tensorrt_llm::executor::debugtensorsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", false]], "tensorrt_llm::executor::debugtensorsperiteration::debugtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", false]], "tensorrt_llm::executor::debugtensorsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", false]], "tensorrt_llm::executor::decodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::decodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::decodingconfig::enableseamlesslookaheaddecoding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::executor::decodingconfig::getdecodingmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", false]], "tensorrt_llm::executor::decodingconfig::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingmaxnumrequest (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", false]], "tensorrt_llm::executor::decodingconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", false]], "tensorrt_llm::executor::decodingconfig::mdecodingmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", false]], "tensorrt_llm::executor::decodingconfig::meagleconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingmaxnumrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", false]], "tensorrt_llm::executor::decodingconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", false]], "tensorrt_llm::executor::decodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setdecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", false]], "tensorrt_llm::executor::decodingconfig::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", false]], "tensorrt_llm::executor::decodingconfig::setlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", false]], "tensorrt_llm::executor::decodingmode (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", false]], "tensorrt_llm::executor::decodingmode::allbitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::anybitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::auto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", false]], "tensorrt_llm::executor::decodingmode::beamsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::decodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::eagle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", false]], "tensorrt_llm::executor::decodingmode::explicitdrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::externaldrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::getname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", false]], "tensorrt_llm::executor::decodingmode::getstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", false]], "tensorrt_llm::executor::decodingmode::isauto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", false]], "tensorrt_llm::executor::decodingmode::isbeamsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::iseagle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", false]], "tensorrt_llm::executor::decodingmode::isexplicitdrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::isexternaldrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::islookahead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", false]], "tensorrt_llm::executor::decodingmode::ismedusa (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", false]], "tensorrt_llm::executor::decodingmode::istopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", false]], "tensorrt_llm::executor::decodingmode::istopkandtopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopkortopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", false]], "tensorrt_llm::executor::decodingmode::isusebantokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", false]], "tensorrt_llm::executor::decodingmode::isusebanwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", false]], "tensorrt_llm::executor::decodingmode::isuseexpliciteosstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", false]], "tensorrt_llm::executor::decodingmode::isusefrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusemaxlengthstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", false]], "tensorrt_llm::executor::decodingmode::isuseminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", false]], "tensorrt_llm::executor::decodingmode::isuseminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", false]], "tensorrt_llm::executor::decodingmode::isusenorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::decodingmode::isuseoccurrencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isuserepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusestopcriteria (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", false]], "tensorrt_llm::executor::decodingmode::isusestopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", false]], "tensorrt_llm::executor::decodingmode::isusetemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", false]], "tensorrt_llm::executor::decodingmode::isusevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", false]], "tensorrt_llm::executor::decodingmode::kauto (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", false]], "tensorrt_llm::executor::decodingmode::kbeamsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", false]], "tensorrt_llm::executor::decodingmode::keagle (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", false]], "tensorrt_llm::executor::decodingmode::kexplicitdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::kexternaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::klookahead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", false]], "tensorrt_llm::executor::decodingmode::kmedusa (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", false]], "tensorrt_llm::executor::decodingmode::knumflags (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", false]], "tensorrt_llm::executor::decodingmode::ktopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", false]], "tensorrt_llm::executor::decodingmode::ktopktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", false]], "tensorrt_llm::executor::decodingmode::ktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", false]], "tensorrt_llm::executor::decodingmode::kusebantokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", false]], "tensorrt_llm::executor::decodingmode::kusebanwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", false]], "tensorrt_llm::executor::decodingmode::kuseexpliciteosstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", false]], "tensorrt_llm::executor::decodingmode::kusefrequencypenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusemaxlengthstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", false]], "tensorrt_llm::executor::decodingmode::kuseminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", false]], "tensorrt_llm::executor::decodingmode::kuseminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", false]], "tensorrt_llm::executor::decodingmode::kusenorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::decodingmode::kuseoccurrencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepresencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kuserepetitionpenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusestandardstopcriteria (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", false]], "tensorrt_llm::executor::decodingmode::kusestopwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", false]], "tensorrt_llm::executor::decodingmode::kusetemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", false]], "tensorrt_llm::executor::decodingmode::kusevariablebeamwidthsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", false]], "tensorrt_llm::executor::decodingmode::lookahead (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", false]], "tensorrt_llm::executor::decodingmode::medusa (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", false]], "tensorrt_llm::executor::decodingmode::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", false]], "tensorrt_llm::executor::decodingmode::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", false]], "tensorrt_llm::executor::decodingmode::setbitto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", false]], "tensorrt_llm::executor::decodingmode::topk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", false]], "tensorrt_llm::executor::decodingmode::topktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", false]], "tensorrt_llm::executor::decodingmode::topp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", false]], "tensorrt_llm::executor::decodingmode::underlyingtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::executor::decodingmode::usebantokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", false]], "tensorrt_llm::executor::decodingmode::usebanwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", false]], "tensorrt_llm::executor::decodingmode::useexpliciteosstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", false]], "tensorrt_llm::executor::decodingmode::usefrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usemaxlengthstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", false]], "tensorrt_llm::executor::decodingmode::useminlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", false]], "tensorrt_llm::executor::decodingmode::useminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", false]], "tensorrt_llm::executor::decodingmode::usenorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", false]], "tensorrt_llm::executor::decodingmode::useoccurrencepenalties (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", false]], "tensorrt_llm::executor::decodingmode::usepresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", false]], "tensorrt_llm::executor::decodingmode::userepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usestopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", false]], "tensorrt_llm::executor::decodingmode::usetemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", false]], "tensorrt_llm::executor::decodingmode::usevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", false]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE", false]], "tensorrt_llm::executor::detail::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", false]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::disagg_executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitcontextresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitgenerationresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::canenqueue (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuecontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuegeneration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getcontextexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getgenexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::~disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", false]], "tensorrt_llm::executor::disagg_executor::responsewithid (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::gid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::response (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::~responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", false]], "tensorrt_llm::executor::disservingrequeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachetransferms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", false]], "tensorrt_llm::executor::dynamicbatchconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", false]], "tensorrt_llm::executor::dynamicbatchconfig::dynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", false]], "tensorrt_llm::executor::dynamicbatchconfig::getbatchsizetable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getdynamicbatchmovingaveragewindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablebatchsizetuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablemaxnumtokenstuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablebatchsizetuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablemaxnumtokenstuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", false]], "tensorrt_llm::executor::eaglechoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", false]], "tensorrt_llm::executor::eagleconfig::checkposteriorvalue (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", false]], "tensorrt_llm::executor::eagleconfig::eagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::eagleconfig::getdynamictreemaxtopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", false]], "tensorrt_llm::executor::eagleconfig::geteaglechoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", false]], "tensorrt_llm::executor::eagleconfig::getposteriorthreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", false]], "tensorrt_llm::executor::eagleconfig::isgreedysampling (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", false]], "tensorrt_llm::executor::eagleconfig::mdynamictreemaxtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", false]], "tensorrt_llm::executor::eagleconfig::meaglechoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig::mgreedysampling (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", false]], "tensorrt_llm::executor::eagleconfig::mposteriorthreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", false]], "tensorrt_llm::executor::eagleconfig::musedynamictree (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", false]], "tensorrt_llm::executor::eagleconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", false]], "tensorrt_llm::executor::eagleconfig::usedynamictree (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", false]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE", false]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", false]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", false]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", false]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", false]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", false]], "tensorrt_llm::executor::executor::getkvcacheeventmanager (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", false]], "tensorrt_llm::executor::executor::getlatestdebugtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", false]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", false]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", false]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", false]], "tensorrt_llm::executor::executor::isparticipant (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", false]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", false]], "tensorrt_llm::executor::executor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", false]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", false]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", false]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", false]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", false]], "tensorrt_llm::executor::executorconfig::getadditionalmodeloutputs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", false]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", false]], "tensorrt_llm::executor::executorconfig::getcachetransceiverconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdebugconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", false]], "tensorrt_llm::executor::executorconfig::getenabletrtoverlap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", false]], "tensorrt_llm::executor::executorconfig::getextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", false]], "tensorrt_llm::executor::executorconfig::getgathergenerationlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", false]], "tensorrt_llm::executor::executorconfig::getgpuweightspercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", false]], "tensorrt_llm::executor::executorconfig::getguideddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbatchsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::executor::executorconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::executorconfig::getmaxqueuesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", false]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", false]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", false]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getprompttableoffloading (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", false]], "tensorrt_llm::executor::executorconfig::getrecvpollperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", false]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getspecdecconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", false]], "tensorrt_llm::executor::executorconfig::getusegpudirectstorage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", false]], "tensorrt_llm::executor::executorconfig::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::madditionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", false]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", false]], "tensorrt_llm::executor::executorconfig::mcachetransceiverconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", false]], "tensorrt_llm::executor::executorconfig::mdebugconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", false]], "tensorrt_llm::executor::executorconfig::mdecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", false]], "tensorrt_llm::executor::executorconfig::menabletrtoverlap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", false]], "tensorrt_llm::executor::executorconfig::mextendedruntimeperfknobconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::executorconfig::mgathergenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", false]], "tensorrt_llm::executor::executorconfig::mgpuweightspercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", false]], "tensorrt_llm::executor::executorconfig::mguideddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::executorconfig::mmaxbatchsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", false]], "tensorrt_llm::executor::executorconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::executorconfig::mmaxqueuesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", false]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", false]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mprompttableoffloading (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", false]], "tensorrt_llm::executor::executorconfig::mrecvpollperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", false]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", false]], "tensorrt_llm::executor::executorconfig::mspeculativedecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::musegpudirectstorage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", false]], "tensorrt_llm::executor::executorconfig::setadditionalmodeloutputs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", false]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", false]], "tensorrt_llm::executor::executorconfig::setcachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::executorconfig::setdebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", false]], "tensorrt_llm::executor::executorconfig::setdecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", false]], "tensorrt_llm::executor::executorconfig::setenabletrtoverlap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", false]], "tensorrt_llm::executor::executorconfig::setextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::executorconfig::setgathergenerationlogits (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", false]], "tensorrt_llm::executor::executorconfig::setgpuweightspercent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", false]], "tensorrt_llm::executor::executorconfig::setguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", false]], "tensorrt_llm::executor::executorconfig::setmaxbatchsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxqueuesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::executorconfig::setmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", false]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", false]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", false]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setprompttableoffloading (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", false]], "tensorrt_llm::executor::executorconfig::setrecvpollperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", false]], "tensorrt_llm::executor::executorconfig::setspecdecconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setusegpudirectstorage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::extendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getmultiblockmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::menablecontextfmhafp32acc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mmultiblockmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setmultiblockmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", false]], "tensorrt_llm::executor::externaldrafttokensconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::externaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getfastlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mfastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", false]], "tensorrt_llm::executor::finishreason (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", false]], "tensorrt_llm::executor::finishreason::kcancelled (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", false]], "tensorrt_llm::executor::finishreason::kend_id (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", false]], "tensorrt_llm::executor::finishreason::klength (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", false]], "tensorrt_llm::executor::finishreason::knot_finished (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", false]], "tensorrt_llm::executor::finishreason::kstop_words (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", false]], "tensorrt_llm::executor::finishreason::ktimed_out (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", false]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", false]], "tensorrt_llm::executor::guideddecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", false]], "tensorrt_llm::executor::guideddecodingconfig::getbackend (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getencodedvocab (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getstoptokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", false]], "tensorrt_llm::executor::guideddecodingconfig::gettokenizerstr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kxgrammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::mbackend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::mencodedvocab (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", false]], "tensorrt_llm::executor::guideddecodingconfig::mstoptokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", false]], "tensorrt_llm::executor::guideddecodingconfig::mtokenizerstr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", false]], "tensorrt_llm::executor::guideddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::guideddecodingconfig::setbackend (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", false]], "tensorrt_llm::executor::guideddecodingconfig::setencodedvocab (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::setstoptokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", false]], "tensorrt_llm::executor::guideddecodingconfig::settokenizerstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", false]], "tensorrt_llm::executor::guideddecodingconfig::validate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", false]], "tensorrt_llm::executor::guideddecodingparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", false]], "tensorrt_llm::executor::guideddecodingparams::getguide (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", false]], "tensorrt_llm::executor::guideddecodingparams::getguidetype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", false]], "tensorrt_llm::executor::guideddecodingparams::guideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kebnf_grammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson_schema (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kregex (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kstructural_tag (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", false]], "tensorrt_llm::executor::guideddecodingparams::mguide (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", false]], "tensorrt_llm::executor::guideddecodingparams::mguidetype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE", false]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", false]], "tensorrt_llm::executor::inflightbatchingstats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", false]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", false]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::crosskvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", false]], "tensorrt_llm::executor::iterationstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizeruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizestatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizetunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensstatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokenstunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::newactiverequestsqueuelatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numcompletedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", false]], "tensorrt_llm::executor::iterationstats::numnewactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numqueuedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", false]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", false]], "tensorrt_llm::executor::iterationstats::specdecstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12specDecStatsE", false]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", false]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", false]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", false]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", false]], "tensorrt_llm::executor::kv_cache (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", false]], "tensorrt_llm::executor::kv_cache::agentdesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::agentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::getbackendagentdesc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::agentdesc::mbackendagentdesc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", false]], "tensorrt_llm::executor::kv_cache::agentstate::agentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", false]], "tensorrt_llm::executor::kv_cache::agentstate::magentname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", false]], "tensorrt_llm::executor::kv_cache::agentstate::mconnectioninfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", false]], "tensorrt_llm::executor::kv_cache::agentstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", false]], "tensorrt_llm::executor::kv_cache::agentstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::mname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::useprogthread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::checkremotedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::connectremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::deregistermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getconnectioninfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getConnectionInfoEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalagentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getnotifiedsyncmessages (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::invalidateremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::loadremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::notifysyncmessage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::registermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::submittransferrequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::~basetransferagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::cachestate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::attentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mattentiontype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mkvfactor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kdefault (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kmla (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", false]], "tensorrt_llm::executor::kv_cache::cachestate::cachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", false]], "tensorrt_llm::executor::kv_cache::cachestate::getattentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getmodelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::mattentionconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mdatatype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mmodelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mnbkvheadsperlayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::msizeperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mtokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdpsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::menableattentiondp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mpipelineparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mtensorparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::commstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::commstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getmpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getselfidx (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getsocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::isagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::ismpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::issocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::mselfidx (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", false]], "tensorrt_llm::executor::kv_cache::commstate::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", false]], "tensorrt_llm::executor::kv_cache::commstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::connection (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", false]], "tensorrt_llm::executor::kv_cache::connection::isthreadsafe (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", false]], "tensorrt_llm::executor::kv_cache::connection::recv (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::send (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::~connection (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", false]], "tensorrt_llm::executor::kv_cache::connectioninfotype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getconnections (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::recvconnect (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::~connectionmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", false]], "tensorrt_llm::executor::kv_cache::datacontext (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", false]], "tensorrt_llm::executor::kv_cache::datacontext::datacontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", false]], "tensorrt_llm::executor::kv_cache::datacontext::gettag (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", false]], "tensorrt_llm::executor::kv_cache::datacontext::mtag (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dlsym (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getfunctionpointer (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::gethandle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getinstance (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mdllmutex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mhandlers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::~dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", false]], "tensorrt_llm::executor::kv_cache::maketransferagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::memorydesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::deserialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getaddr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getdeviceid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::maddr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mdeviceid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::memorydesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", false]], "tensorrt_llm::executor::kv_cache::memorydescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::gettype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::memorydescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kblk (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kdram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", false]], "tensorrt_llm::executor::kv_cache::memorytype::kfile (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kobj (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kvram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", false]], "tensorrt_llm::executor::kv_cache::mpistate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", false]], "tensorrt_llm::executor::kv_cache::mpistate::mranks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", false]], "tensorrt_llm::executor::kv_cache::mpistate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", false]], "tensorrt_llm::executor::kv_cache::mpistate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::registerdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", false]], "tensorrt_llm::executor::kv_cache::socketstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mip (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mport (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", false]], "tensorrt_llm::executor::kv_cache::socketstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", false]], "tensorrt_llm::executor::kv_cache::socketstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::syncmessage (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", false]], "tensorrt_llm::executor::kv_cache::transferop (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", false]], "tensorrt_llm::executor::kv_cache::transferop::kread (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", false]], "tensorrt_llm::executor::kv_cache::transferop::kwrite (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", false]], "tensorrt_llm::executor::kv_cache::transferrequest (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getdstdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getremotename (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsrcdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsyncmessage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mdstdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mremotename (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msrcdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msyncmessage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::transferrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", false]], "tensorrt_llm::executor::kv_cache::transferstatus (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", false]], "tensorrt_llm::executor::kv_cache::transferstatus::iscompleted (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::wait (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::~transferstatus (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", false]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", false]], "tensorrt_llm::executor::kvcacheconfig::fillemptyfieldsfromruntimedefaults (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::executor::kvcacheconfig::getcopyonpartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcrosskvcachefraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenablepartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::geteventbuffermaxsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindowvec (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", false]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", false]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", false]], "tensorrt_llm::executor::kvcacheconfig::mcopyonpartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::mcrosskvcachefraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::menablepartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::meventbuffermaxsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindowvec (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", false]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", false]], "tensorrt_llm::executor::kvcacheconfig::msecondaryoffloadminpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", false]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", false]], "tensorrt_llm::executor::kvcacheconfig::setcopyonpartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setcrosskvcachefraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::setenableblockreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setenablepartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::seteventbuffermaxsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setfreegpumemoryfraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::sethostcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxattentionwindowvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setonboardblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", false]], "tensorrt_llm::executor::kvcacheconfig::setsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", false]], "tensorrt_llm::executor::kvcacheconfig::setsinktokenlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", false]], "tensorrt_llm::executor::kvcachecreateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", false]], "tensorrt_llm::executor::kvcachecreateddata::numblockspercachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", false]], "tensorrt_llm::executor::kvcacheevent (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", false]], "tensorrt_llm::executor::kvcacheevent::data (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", false]], "tensorrt_llm::executor::kvcacheevent::eventid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", false]], "tensorrt_llm::executor::kvcacheevent::kvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", false]], "tensorrt_llm::executor::kvcacheeventdata (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", false]], "tensorrt_llm::executor::kvcacheeventdiff (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", false]], "tensorrt_llm::executor::kvcacheeventdiff::newvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", false]], "tensorrt_llm::executor::kvcacheeventdiff::oldvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", false]], "tensorrt_llm::executor::kvcacheeventmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", false]], "tensorrt_llm::executor::kvcacheeventmanager::getlatestevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcacheeventmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcachemanager (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", false]], "tensorrt_llm::executor::kvcacheremoveddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", false]], "tensorrt_llm::executor::kvcacheremoveddata::blockhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", false]], "tensorrt_llm::executor::kvcacheretentionconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecodedurationms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecoderetentionpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdirectory (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getperblockretentionpriorityduration (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettokenrangeretentionconfigs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettransfermode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kdefaultretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kmaxretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kminretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", false], [0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecodedurationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecoderetentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdirectory (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtokenrangeretentionconfigs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtransfermode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", false]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", false]], "tensorrt_llm::executor::kvcachestats::allocnewblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", false]], "tensorrt_llm::executor::kvcachestats::alloctotalblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", false]], "tensorrt_llm::executor::kvcachestats::cachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", false]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::missedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::reusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", false]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", false]], "tensorrt_llm::executor::kvcachestoredblockdata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::kvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcachestoredblockdata::loraid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::tokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", false]], "tensorrt_llm::executor::kvcachestoreddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", false]], "tensorrt_llm::executor::kvcachestoreddata::blocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", false]], "tensorrt_llm::executor::kvcachestoreddata::parenthash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", false]], "tensorrt_llm::executor::kvcachetransfermode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", false]], "tensorrt_llm::executor::kvcachetransfermode::dram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", false]], "tensorrt_llm::executor::kvcachetransfermode::gds (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", false]], "tensorrt_llm::executor::kvcachetransfermode::posix_debug_fallback (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", false]], "tensorrt_llm::executor::kvcacheupdateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", false]], "tensorrt_llm::executor::kvcacheupdateddata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevelupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheupdateddata::kvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", false]], "tensorrt_llm::executor::kvcacheupdateddata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priorityupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", false]], "tensorrt_llm::executor::logitspostprocessorbatched (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessorbatched (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getreplicate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::logitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessorbatched (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mreplicate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessorbatched (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setreplicate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", false]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresource (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresourcetuple (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::get (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getverificationsetsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getwindowsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::isle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::islegal (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingngram (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingverificationset (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::lookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mverificationsetsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mwindowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", false]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", false]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", false]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", false]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", false]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", false]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", false]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", false]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", false]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", false]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinnedpool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", false]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", false]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", false]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", false]], "tensorrt_llm::executor::millisecondstype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", false]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", false]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", false]], "tensorrt_llm::executor::modeltype::kencoder_decoder (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", false]], "tensorrt_llm::executor::modeltype::kencoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", false]], "tensorrt_llm::executor::mropeconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", false]], "tensorrt_llm::executor::mropeconfig::getmropepositiondeltas (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", false]], "tensorrt_llm::executor::mropeconfig::getmroperotarycossin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", false]], "tensorrt_llm::executor::mropeconfig::mmropepositiondeltas (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", false]], "tensorrt_llm::executor::mropeconfig::mmroperotarycossin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", false]], "tensorrt_llm::executor::mropeconfig::mropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", false]], "tensorrt_llm::executor::operator<< (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", false], [0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", false]], "tensorrt_llm::executor::orchestratorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", false]], "tensorrt_llm::executor::orchestratorconfig::getisorchestrator (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", false]], "tensorrt_llm::executor::orchestratorconfig::getorchleadercomm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", false]], "tensorrt_llm::executor::orchestratorconfig::getspawnprocesses (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", false]], "tensorrt_llm::executor::orchestratorconfig::getworkerexecutablepath (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", false]], "tensorrt_llm::executor::orchestratorconfig::misorchestrator (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", false]], "tensorrt_llm::executor::orchestratorconfig::morchleadercomm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", false]], "tensorrt_llm::executor::orchestratorconfig::mspawnprocesses (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", false]], "tensorrt_llm::executor::orchestratorconfig::mworkerexecutablepath (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", false]], "tensorrt_llm::executor::orchestratorconfig::orchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", false]], "tensorrt_llm::executor::orchestratorconfig::setisorchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", false]], "tensorrt_llm::executor::orchestratorconfig::setorchleadercomm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", false]], "tensorrt_llm::executor::orchestratorconfig::setspawnprocesses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", false]], "tensorrt_llm::executor::orchestratorconfig::setworkerexecutablepath (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", false]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", false]], "tensorrt_llm::executor::outputconfig::additionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", false]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", false]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", false]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnencoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", false]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", false]], "tensorrt_llm::executor::outputconfig::returnperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", false]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", false]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", false]], "tensorrt_llm::executor::parallelconfig::getnumnodes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", false]], "tensorrt_llm::executor::parallelconfig::getorchestratorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", false]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", false]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", false]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", false]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", false]], "tensorrt_llm::executor::parallelconfig::mnumnodes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", false]], "tensorrt_llm::executor::parallelconfig::morchestratorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", false]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", false]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", false]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setnumnodes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", false]], "tensorrt_llm::executor::parallelconfig::setorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", false]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", false]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", false]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getloraprefetchdir (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultoptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", false]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mloraprefetchdir (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", false]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::prioritytype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", false]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", false]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", false]], "tensorrt_llm::executor::prompttuningconfig::getinputtokenextraids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", false]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", false]], "tensorrt_llm::executor::prompttuningconfig::minputtokenextraids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", false]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", false]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", false]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE", false]], "tensorrt_llm::executor::request::getadditionaloutputnames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", false]], "tensorrt_llm::executor::request::getallottedtimems (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", false]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", false]], "tensorrt_llm::executor::request::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", false]], "tensorrt_llm::executor::request::getcontextphaseparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", false]], "tensorrt_llm::executor::request::getcrossattentionmask (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", false]], "tensorrt_llm::executor::request::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", false]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", false]], "tensorrt_llm::executor::request::getencoderinputfeatures (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", false]], "tensorrt_llm::executor::request::getencoderinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getencoderoutputlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", false]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", false]], "tensorrt_llm::executor::request::getexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", false]], "tensorrt_llm::executor::request::getguideddecodingparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", false]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getkvcacheretentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::request::getlanguageadapteruid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", false]], "tensorrt_llm::executor::request::getlookaheadconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", false]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", false]], "tensorrt_llm::executor::request::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", false]], "tensorrt_llm::executor::request::getmropeconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", false]], "tensorrt_llm::executor::request::getmultimodalembedding (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", false]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", false]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", false]], "tensorrt_llm::executor::request::getpositionids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", false]], "tensorrt_llm::executor::request::getpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", false]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", false]], "tensorrt_llm::executor::request::getrequesttype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", false]], "tensorrt_llm::executor::request::getreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", false]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", false]], "tensorrt_llm::executor::request::getskipcrossattnblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", false]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", false]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", false]], "tensorrt_llm::executor::request::kbatchedpostprocessorname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", false]], "tensorrt_llm::executor::request::kdefaultpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", false]], "tensorrt_llm::executor::request::kdynamicpostprocessornameprefix (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", false]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", false]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", false]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", false]], "tensorrt_llm::executor::request::setallottedtimems (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", false]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setclientid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", false]], "tensorrt_llm::executor::request::setcontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", false]], "tensorrt_llm::executor::request::setcrossattentionmask (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", false]], "tensorrt_llm::executor::request::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputfeatures (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputtokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", false]], "tensorrt_llm::executor::request::setencoderoutputlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", false]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", false]], "tensorrt_llm::executor::request::setexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", false]], "tensorrt_llm::executor::request::setguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::request::setkvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::request::setlanguageadapteruid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", false]], "tensorrt_llm::executor::request::setlogitspostprocessor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", false]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", false]], "tensorrt_llm::executor::request::setlookaheadconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", false]], "tensorrt_llm::executor::request::setmropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", false]], "tensorrt_llm::executor::request::setmultimodalembedding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", false]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", false]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", false]], "tensorrt_llm::executor::request::setpositionids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::request::setpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", false]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", false]], "tensorrt_llm::executor::request::setrequesttype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", false]], "tensorrt_llm::executor::request::setreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", false]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", false]], "tensorrt_llm::executor::request::setskipcrossattnblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", false]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", false]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", false]], "tensorrt_llm::executor::requestperfmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::firstiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", false]], "tensorrt_llm::executor::requestperfmetrics::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::kvcachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::nummissedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numnewallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numreusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numtotalallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::lastiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecoding (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::acceptancerate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totalaccepteddrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::timepoint (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::arrivaltime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firstscheduledtime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firsttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::lasttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", false]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE", false]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kencoder_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", false]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", false]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", false]], "tensorrt_llm::executor::requeststats::allocnewblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::alloctotalblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", false]], "tensorrt_llm::executor::requeststats::disservingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", false]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", false]], "tensorrt_llm::executor::requeststats::kvcachehitrateperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", false]], "tensorrt_llm::executor::requeststats::missedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", false]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", false]], "tensorrt_llm::executor::requeststats::reusedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", false]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", false]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", false]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", false]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", false]], "tensorrt_llm::executor::requesttype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", false]], "tensorrt_llm::executor::requesttype::request_type_context_and_generation (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", false]], "tensorrt_llm::executor::requesttype::request_type_context_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", false]], "tensorrt_llm::executor::requesttype::request_type_generation_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", false]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE", false]], "tensorrt_llm::executor::response::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", false]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", false]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", false]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", false]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", false]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", false]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", false]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", false]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", false]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE", false]], "tensorrt_llm::executor::result::additionaloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", false]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", false]], "tensorrt_llm::executor::result::contextphaseparams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", false]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", false]], "tensorrt_llm::executor::result::decodingiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", false]], "tensorrt_llm::executor::result::encoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", false]], "tensorrt_llm::executor::result::finishreasons (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", false]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", false]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", false]], "tensorrt_llm::executor::result::issequencefinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", false]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", false]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", false]], "tensorrt_llm::executor::result::requestperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", false]], "tensorrt_llm::executor::result::sequenceindex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", false]], "tensorrt_llm::executor::result::specdecfastlogitsinfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", false]], "tensorrt_llm::executor::retentionpriority (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", false]], "tensorrt_llm::executor::retentionpriorityandduration::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriorityandduration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checklengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidtharray (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", false]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", false]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", false]], "tensorrt_llm::executor::samplingconfig::getmintokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", false]], "tensorrt_llm::executor::samplingconfig::getnorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnbeams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnsequences (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", false]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", false]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", false]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", false]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", false]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidtharray (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", false]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", false]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", false]], "tensorrt_llm::executor::samplingconfig::mmintokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", false]], "tensorrt_llm::executor::samplingconfig::mnorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnbeams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnsequences (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", false]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", false]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", false]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", false]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", false]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", false]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", false]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", false]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setfrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setlengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setpresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setseed (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::settopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::updatenumreturnbeams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", false]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", false]], "tensorrt_llm::executor::schedulerconfig::getcapacityschedulerpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getcontextchunkingpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getdynamicbatchconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", false]], "tensorrt_llm::executor::schedulerconfig::mcapacityschedulerpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mcontextchunkingpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mdynamicbatchconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", false]], "tensorrt_llm::executor::schedulerconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", false]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", false]], "tensorrt_llm::executor::serialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13SerializationE", false]], "tensorrt_llm::executor::serialization::deserializeadditionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeadditionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeagentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializebool (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedatatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedisservingrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeeagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexecutorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeinflightbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstatsvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializelookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemodeltype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializepeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequestperfmetrics (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiteration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiterationvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresponse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresult (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesocketstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecfastlogitsinfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecodingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestaticbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestring (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetimepoint (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", false]], "tensorrt_llm::executor::serialization::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE", false]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", false]], "tensorrt_llm::executor::shape::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", false]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", false]], "tensorrt_llm::executor::sizetype32 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType32E", false]], "tensorrt_llm::executor::sizetype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType64E", false]], "tensorrt_llm::executor::specdecodingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", false]], "tensorrt_llm::executor::specdecodingstats::acceptancelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", false]], "tensorrt_llm::executor::specdecodingstats::draftoverhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", false]], "tensorrt_llm::executor::specdecodingstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", false]], "tensorrt_llm::executor::specdecodingstats::numacceptedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numrequestswithdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", false]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::speculativedecodingconfig::fastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", false]], "tensorrt_llm::executor::speculativedecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftparticipantid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftrequestid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::totensor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", false]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", false]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", false]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", false]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE", false]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", false], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", false]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", false]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", false]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", false]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", false]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", false]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", false]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", false]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", false]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", false]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", false], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", false]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", false]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", false]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", false]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", false]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", false]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", false]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", false]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", false]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", false]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", false]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", false]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", false]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", false]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", false]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", false]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", false]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", false]], "tensorrt_llm::executor::vectokenextraids (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", false]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE", false]], "tensorrt_llm::executor::version (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7versionEv", false]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE", false]], "tensorrt_llm::mpi (c++ type)": [[0, "_CPPv4N12tensorrt_llm3mpiE", false]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE", false], [1, "_CPPv4N12tensorrt_llm7runtimeE", false]], "tensorrt_llm::runtime::allreducebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", false]], "tensorrt_llm::runtime::allreducebuffers::allreducebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", false]], "tensorrt_llm::runtime::allreducebuffers::mallreducecommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mflagptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", false]], "tensorrt_llm::runtime::allreducebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", false]], "tensorrt_llm::runtime::buffercastornull (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", false]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", false]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsizeinbits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", false]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", false]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", false]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", false]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", false]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", false]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", false]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", false]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", false]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::cudamempoolptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", false]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", false]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", false]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", false]], "tensorrt_llm::runtime::buffermanager::ipcnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", false]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", false]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", false]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", false]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", false]], "tensorrt_llm::runtime::buffermanager::mpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", false]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", false]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", false]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", false]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", false]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", false]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", false]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", false], [1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", false]], "tensorrt_llm::runtime::canaccesspeer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", false]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", false]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", false]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", false]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", false]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", false]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", false]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", false]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", false]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", false]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", false]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", false]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", false]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", false]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", false]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", false]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", false]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", false]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", false]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", false]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", false]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", false]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", false]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::beamsearchbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mcumlogprobstmp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mnumsms (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::moutputbeamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", false]], "tensorrt_llm::runtime::decoder::decoderstate::allocatespeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::decoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getbeamsearchbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishedStepsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishreasons (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgatheredids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodinginput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodingoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingdecodertokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxsequencelength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnumdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getprevdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getsequencelengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mbeamsearchbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mfinishedsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14mFinishedStepsE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingdecodertokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mnumdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setnumdecodingenginetokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupeagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState10setupEagleEN12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setuplookahead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14setupLookaheadE24LookaheadDecodingBuffers", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", false]], "tensorrt_llm::runtime::decoder_batch (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", false]], "tensorrt_llm::runtime::decoder_batch::input (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", false]], "tensorrt_llm::runtime::decoder_batch::input::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input10batchSlotsE", false]], "tensorrt_llm::runtime::decoder_batch::input::batchslotsrequestorder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input22batchSlotsRequestOrderE", false]], "tensorrt_llm::runtime::decoder_batch::input::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", false]], "tensorrt_llm::runtime::decoder_batch::input::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input11eagleInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::eaglelastinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15eagleLastInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::explicitdrafttokenslastinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input29explicitDraftTokensLastInputsE", false]], "tensorrt_llm::runtime::decoder_batch::input::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15generationStepsE", false]], "tensorrt_llm::runtime::decoder_batch::input::input (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", false], [1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", false]], "tensorrt_llm::runtime::decoder_batch::input::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", false]], "tensorrt_llm::runtime::decoder_batch::input::maxdecodersteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15maxDecoderStepsE", false]], "tensorrt_llm::runtime::decoder_batch::input::predicteddraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input20predictedDraftLogitsE", false]], "tensorrt_llm::runtime::decoder_batch::input::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", false]], "tensorrt_llm::runtime::decoder_batch::input::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", false]], "tensorrt_llm::runtime::decoder_batch::output (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", false]], "tensorrt_llm::runtime::decoder_batch::output::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output16cacheIndirectionE", false]], "tensorrt_llm::runtime::decoder_batch::output::output (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output6OutputEv", false]], "tensorrt_llm::runtime::decoder_batch::output::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output9TensorPtrE", false]], "tensorrt_llm::runtime::decoder_batch::request (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", false]], "tensorrt_llm::runtime::decoder_batch::request::badwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", false]], "tensorrt_llm::runtime::decoder_batch::request::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", false]], "tensorrt_llm::runtime::decoder_batch::request::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE", false]], "tensorrt_llm::runtime::decoder_batch::request::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", false]], "tensorrt_llm::runtime::decoder_batch::request::dtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5dtypeE", false]], "tensorrt_llm::runtime::decoder_batch::request::eagleconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11eagleConfigE", false]], "tensorrt_llm::runtime::decoder_batch::request::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", false]], "tensorrt_llm::runtime::decoder_batch::request::endid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", false]], "tensorrt_llm::runtime::decoder_batch::request::generatedtokensperenginestep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE", false]], "tensorrt_llm::runtime::decoder_batch::request::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", false]], "tensorrt_llm::runtime::decoder_batch::request::inputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", false]], "tensorrt_llm::runtime::decoder_batch::request::lookaheadruntimeconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request22lookaheadRuntimeConfigE", false]], "tensorrt_llm::runtime::decoder_batch::request::maxnewtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", false]], "tensorrt_llm::runtime::decoder_batch::request::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE", false]], "tensorrt_llm::runtime::decoder_batch::request::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decoder_batch::request::request (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::decoder_batch::request::stopwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", false]], "tensorrt_llm::runtime::decoder_batch::request::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14TensorConstPtrE", false]], "tensorrt_llm::runtime::decoder_batch::request::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", false]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", false]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::badwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", false]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", false]], "tensorrt_llm::runtime::decodinginput::beamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", false]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedpathids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::eagleinputs (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", false]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastpositionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::maxgenlengthdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::constantthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::targetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::userandomacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", false]], "tensorrt_llm::runtime::decodinginput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", false]], "tensorrt_llm::runtime::decodinginput::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", false]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", false]], "tensorrt_llm::runtime::decodinginput::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", false]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs::tokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", false]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", false]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", false]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", false]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", false]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", false]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::batchdones (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeamscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", false]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::decodingoutput::eaglebuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::explicitdrafttokensbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", false]], "tensorrt_llm::runtime::decodingoutput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", false]], "tensorrt_llm::runtime::decodingoutput::gatheredids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", false]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", false]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", false]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", false]], "tensorrt_llm::runtime::decodingoutput::lookaheadoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", false]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", false]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::pathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::prevdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", false]], "tensorrt_llm::runtime::deviceallocationnvls (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_capacity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", false]], "tensorrt_llm::runtime::deviceallocationnvls::deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::free (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getipcunicastpointers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getmulticastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getunicastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::reset (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::deviceallocationnvls::~deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", false]], "tensorrt_llm::runtime::eaglebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", false]], "tensorrt_llm::runtime::eaglebuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::chunkedcontextnexttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::eaglebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::greedysamplinghost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenidspredecessor (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::currentexpandindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpathshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::dynamictreemaxtopkhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxcontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgencontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::inputgentokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posterioralpha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::prevscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usedynamictreehost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::eaglebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", false]], "tensorrt_llm::runtime::eaglebuffers::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::maxgenerationlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", false]], "tensorrt_llm::runtime::eaglebuffers::mdefaultposteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::mdogreedysampling (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", false]], "tensorrt_llm::runtime::eaglebuffers::posterioralphahost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", false]], "tensorrt_llm::runtime::eaglebuffers::posteriorthresholdhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", false]], "tensorrt_llm::runtime::eaglebuffers::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", false]], "tensorrt_llm::runtime::eaglebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", false]], "tensorrt_llm::runtime::eaglebuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", false]], "tensorrt_llm::runtime::eaglebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", false]], "tensorrt_llm::runtime::eaglebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::eaglemodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", false]], "tensorrt_llm::runtime::eaglemodule::eaglemodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", false]], "tensorrt_llm::runtime::eaglemodule::getdefaulteaglechoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", false]], "tensorrt_llm::runtime::eaglemodule::getmaxnonleafnodesperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", false]], "tensorrt_llm::runtime::eaglemodule::getnumtransformerlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", false]], "tensorrt_llm::runtime::eaglemodule::mdefaulteaglechoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", false]], "tensorrt_llm::runtime::eaglemodule::mmaxnonleafnodesperlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", false]], "tensorrt_llm::runtime::eaglemodule::mnumtransformerslayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::requesttypesdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::maxgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::totalgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::explicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::maxgenlengthhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", false], [1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", false]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", false]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", false]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", false]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", false]], "tensorrt_llm::runtime::getdefaultbatchslots (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", false]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", false]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodinglayerworkspace (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", false]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", false]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", false]], "tensorrt_llm::runtime::gptdecoder::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", false]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", false]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", false]], "tensorrt_llm::runtime::gptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::gptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwarddispatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::gptdecoderbatched::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getunderlyingdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mruntimestream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::gptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", false], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", false]], "tensorrt_llm::runtime::gptjsonconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfigmutable (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getruntimedefaults (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", false]], "tensorrt_llm::runtime::gptjsonconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::gptjsonconfig::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", false]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", false]], "tensorrt_llm::runtime::gptjsonconfig::mruntimedefaults (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", false]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", false]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", false]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", false]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", false]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", false]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", false]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", false]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", false]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", false]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", false]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", false]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", false]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", false]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", false]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", false]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::igptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", false]], "tensorrt_llm::runtime::igptdecoder::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", false]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", false]], "tensorrt_llm::runtime::igptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", false]], "tensorrt_llm::runtime::igptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::igptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::igptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::igptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", false]], "tensorrt_llm::runtime::igptdecoderbatched::igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", false]], "tensorrt_llm::runtime::igptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::igptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::igptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::~igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", false]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", false]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", false]], "tensorrt_llm::runtime::ipcmemory::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", false]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", false]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", false]], "tensorrt_llm::runtime::ipcmemory::getcommptrs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", false]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::mbuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", false]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", false]], "tensorrt_llm::runtime::ipcmemory::mopenipc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", false]], "tensorrt_llm::runtime::ipcmemory::mtprank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", false]], "tensorrt_llm::runtime::ipcmemory::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", false]], "tensorrt_llm::runtime::ipcnvlsallocate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::ipcnvlsfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", false]], "tensorrt_llm::runtime::ipcnvlshandle (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_handles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_ptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_vas (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", false]], "tensorrt_llm::runtime::ipcnvlshandle::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", false]], "tensorrt_llm::runtime::ipcnvlssupported (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", false]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", false]], "tensorrt_llm::runtime::itensor::at (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", false]], "tensorrt_llm::runtime::itensor::dimtype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", false]], "tensorrt_llm::runtime::itensor::flattenn (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", false]], "tensorrt_llm::runtime::itensor::getdimension (c++ function)": [[1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", false]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", false]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", false]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", false]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", false]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", false]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", false]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", false]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", false]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", false]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::strides (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", false]], "tensorrt_llm::runtime::itensor::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", false]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", false]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", false]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", false]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", false]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", false]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", false]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", false]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", false]], "tensorrt_llm::runtime::lamportinitializeall (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::lookaheaddecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", false]], "tensorrt_llm::runtime::lookaheadmodule::getexecutionconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", false]], "tensorrt_llm::runtime::lookaheadmodule::lookaheadmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", false]], "tensorrt_llm::runtime::lookaheadmodule::mexecutionconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", false]], "tensorrt_llm::runtime::lookaheadmodule::setexecutionconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::batchslotshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::cumsumlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::disablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::enablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::lookaheadruntimebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmasksdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::setfrominputs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", false]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", false]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", false]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", false]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", false]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", false], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", false]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", false]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", false]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", false]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", false]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", false]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", false]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", false]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", false]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", false]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", false]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", false]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", false]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", false]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", false]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", false]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::scalingvecpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", false]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", false]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", false]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", false]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", false]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", false]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", false]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", false]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", false]], "tensorrt_llm::runtime::loracachefullexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", false]], "tensorrt_llm::runtime::loracachefullexception::loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loracachefullexception::~loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", false]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", false]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", false]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", false]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", false]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", false]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", false]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", false]], "tensorrt_llm::runtime::loraexpectedexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", false]], "tensorrt_llm::runtime::loraexpectedexception::loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loraexpectedexception::~loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", false]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", false]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", false]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localscalessize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::localtotalsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", false]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", false]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate_up (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", false]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", false]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", false]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", false]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", false]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", false]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", false]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", false]], "tensorrt_llm::runtime::lorataskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", false]], "tensorrt_llm::runtime::medusamodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", false]], "tensorrt_llm::runtime::medusamodule::getmedusachoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", false]], "tensorrt_llm::runtime::medusamodule::mdefaultmedusachoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusachoices (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", false]], "tensorrt_llm::runtime::medusamodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", false]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", false]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", false]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", false]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", false]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", false]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", false]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", false]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpooldiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", false]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", false]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", false]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", false]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", false]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", false]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpooldiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", false]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", false]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", false]], "tensorrt_llm::runtime::memorycounters::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", false]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", false]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", false]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", false]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", false]], "tensorrt_llm::runtime::memorytype::kpinnedpool (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", false]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", false]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", false]], "tensorrt_llm::runtime::modelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::modelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::countlocallayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::countlowerranklayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::disableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::runtime::modelconfig::enableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getencoderhiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getfirstlocallayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getgemmallreducedtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvcachetype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getlayertypes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", false]], "tensorrt_llm::runtime::modelconfig::getlogitsdtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", false]], "tensorrt_llm::runtime::modelconfig::getmanageweightstype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxencoderlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpositionembeddings (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", false]], "tensorrt_llm::runtime::modelconfig::getnbattentionlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", false]], "tensorrt_llm::runtime::modelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbrnnlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayerlocalrange (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::getnumlanguages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", false]], "tensorrt_llm::runtime::modelconfig::getoptprofilessplitpoints (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", false]], "tensorrt_llm::runtime::modelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getppreducescatter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", false]], "tensorrt_llm::runtime::modelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", false]], "tensorrt_llm::runtime::modelconfig::getrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::getrotaryembeddingdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", false]], "tensorrt_llm::runtime::modelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmoduleptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false]], "tensorrt_llm::runtime::modelconfig::getsumlocalkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::hasrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::hasspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::iscontinuouskvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::iskvcacheenabled (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", false]], "tensorrt_llm::runtime::modelconfig::ismultimodal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", false]], "tensorrt_llm::runtime::modelconfig::ispagedkvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::isrnnbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", false]], "tensorrt_llm::runtime::modelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", false]], "tensorrt_llm::runtime::modelconfig::iswhisper (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", false]], "tensorrt_llm::runtime::modelconfig::kdefault_num_tokens_per_block (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", false]], "tensorrt_llm::runtime::modelconfig::kopt_profiles_split_points (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kcontinuous (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kpaged (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetypefromstring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::layertype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", false]], "tensorrt_llm::runtime::modelconfig::layertype::kattention (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", false]], "tensorrt_llm::runtime::modelconfig::layertype::klinear (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", false]], "tensorrt_llm::runtime::modelconfig::layertype::knoop (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", false]], "tensorrt_llm::runtime::modelconfig::layertype::krecurrent (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kenabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", false]], "tensorrt_llm::runtime::modelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", false]], "tensorrt_llm::runtime::modelconfig::mencoderhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mgemmallreducedtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", false]], "tensorrt_llm::runtime::modelconfig::mkvcachetype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::mlayertypes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", false]], "tensorrt_llm::runtime::modelconfig::mlogitsdtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", false]], "tensorrt_llm::runtime::modelconfig::mmanageweightstype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::modelconfig::mmaxencoderlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", false]], "tensorrt_llm::runtime::modelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpositionembeddings (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", false]], "tensorrt_llm::runtime::modelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmodelname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", false]], "tensorrt_llm::runtime::modelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::mnbattentionlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", false]], "tensorrt_llm::runtime::modelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbrnnlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadsperattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadspercrossattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumlanguages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", false]], "tensorrt_llm::runtime::modelconfig::modelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kchatglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kencdec (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::krecurrentgemma (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", false]], "tensorrt_llm::runtime::modelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", false]], "tensorrt_llm::runtime::modelconfig::mppreducescatter (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", false]], "tensorrt_llm::runtime::modelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", false]], "tensorrt_llm::runtime::modelconfig::mrnnconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::mrotaryembeddingdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", false]], "tensorrt_llm::runtime::modelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::runtime::modelconfig::mskipcrossattnblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::modelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::runtime::modelconfig::musecrossattention (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", false]], "tensorrt_llm::runtime::modelconfig::musegemmallreduceplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", false]], "tensorrt_llm::runtime::modelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", false]], "tensorrt_llm::runtime::modelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemrope (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", false]], "tensorrt_llm::runtime::modelconfig::musepositionembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::museshapeinference (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", false]], "tensorrt_llm::runtime::modelconfig::musetokentypeembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", false]], "tensorrt_llm::runtime::modelconfig::resetspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::convkernel (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnconvdimsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnheadsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::statesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", false]], "tensorrt_llm::runtime::modelconfig::setcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setencoderhiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setgemmallreducedtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setkvcachetype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", false]], "tensorrt_llm::runtime::modelconfig::setlayertypes (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", false]], "tensorrt_llm::runtime::modelconfig::setlogitsdtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", false]], "tensorrt_llm::runtime::modelconfig::setmanageweightstype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", false]], "tensorrt_llm::runtime::modelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxencoderlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setmaxpositionembeddings (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmodelname (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", false]], "tensorrt_llm::runtime::modelconfig::setnbcrosskvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadspercrosslayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadsperlayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumlanguages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setppreducescatter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", false]], "tensorrt_llm::runtime::modelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", false]], "tensorrt_llm::runtime::modelconfig::setrnnconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", false]], "tensorrt_llm::runtime::modelconfig::setrotaryembeddingdim (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setskipcrossattnblocks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::modelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setusecrossattention (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", false]], "tensorrt_llm::runtime::modelconfig::setusemrope (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", false]], "tensorrt_llm::runtime::modelconfig::setusepositionembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::setuseshapeinference (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", false]], "tensorrt_llm::runtime::modelconfig::setusetokentypeembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::skipcrossattnblocks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", false]], "tensorrt_llm::runtime::modelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", false]], "tensorrt_llm::runtime::modelconfig::usecrossattention (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", false]], "tensorrt_llm::runtime::modelconfig::usegemmallreduceplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", false]], "tensorrt_llm::runtime::modelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", false]], "tensorrt_llm::runtime::modelconfig::uselanguageadapter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", false]], "tensorrt_llm::runtime::modelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemrope (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", false]], "tensorrt_llm::runtime::modelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", false]], "tensorrt_llm::runtime::modelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", false]], "tensorrt_llm::runtime::modelconfig::usepositionembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", false]], "tensorrt_llm::runtime::modelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", false]], "tensorrt_llm::runtime::modelconfig::useshapeinference (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", false]], "tensorrt_llm::runtime::modelconfig::usetokentypeembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", false]], "tensorrt_llm::runtime::mpi_group_barrier (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", false]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", false]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", false]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", false]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::prompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::rawengine (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", false]], "tensorrt_llm::runtime::rawengine::getaddress (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", false]], "tensorrt_llm::runtime::rawengine::gethostmemory (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", false]], "tensorrt_llm::runtime::rawengine::getmanagedweightsmapopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", false]], "tensorrt_llm::runtime::rawengine::getpath (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", false]], "tensorrt_llm::runtime::rawengine::getpathopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", false]], "tensorrt_llm::runtime::rawengine::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", false]], "tensorrt_llm::runtime::rawengine::gettype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", false]], "tensorrt_llm::runtime::rawengine::mengineaddr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", false]], "tensorrt_llm::runtime::rawengine::menginebuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", false]], "tensorrt_llm::runtime::rawengine::menginepath (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", false]], "tensorrt_llm::runtime::rawengine::menginesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", false]], "tensorrt_llm::runtime::rawengine::mmanagedweightsmap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", false]], "tensorrt_llm::runtime::rawengine::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", false]], "tensorrt_llm::runtime::rawengine::rawengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", false]], "tensorrt_llm::runtime::rawengine::setmanagedweightsmap (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", false]], "tensorrt_llm::runtime::rawengine::setpath (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", false]], "tensorrt_llm::runtime::rawengine::type (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", false]], "tensorrt_llm::runtime::rawengine::type::addresswithsize (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", false]], "tensorrt_llm::runtime::rawengine::type::filepath (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", false]], "tensorrt_llm::runtime::rawengine::type::hostmemory (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", false]], "tensorrt_llm::runtime::requesttype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", false]], "tensorrt_llm::runtime::requesttype::kcontext (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", false]], "tensorrt_llm::runtime::requesttype::kgeneration (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", false]], "tensorrt_llm::runtime::runtimedefaults (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::runtime::runtimedefaults::maxattentionwindowvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", false]], "tensorrt_llm::runtime::runtimedefaults::runtimedefaults (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", false], [1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", false]], "tensorrt_llm::runtime::runtimedefaults::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", false]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", false]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidtharray (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", false]], "tensorrt_llm::runtime::samplingconfig::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", false]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", false]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", false]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", false]], "tensorrt_llm::runtime::samplingconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::samplingconfig::getnumreturnbeams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", false]], "tensorrt_llm::runtime::samplingconfig::minp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", false]], "tensorrt_llm::runtime::samplingconfig::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::numreturnsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", false]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", false]], "tensorrt_llm::runtime::samplingconfig::originaltemperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", false]], "tensorrt_llm::runtime::samplingconfig::outputlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", false]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", false]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", false]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", false]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", false]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", false]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", false]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", false]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", false]], "tensorrt_llm::runtime::samplingconfig::usedefaultvalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", false]], "tensorrt_llm::runtime::samplingconfig::validate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", false]], "tensorrt_llm::runtime::samplingconfig::validatevec (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", false]], "tensorrt_llm::runtime::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", false]], "tensorrt_llm::runtime::sizetype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", false]], "tensorrt_llm::runtime::speculativedecodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::drafttokensexternal (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::eagle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::explicitdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::hasdraftlogits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isdrafttokensexternal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::iseagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::islookaheaddecoding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::ismedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isnone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::kdrafttokensexternal (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", false]], "tensorrt_llm::runtime::speculativedecodingmode::keagle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kexplicitdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmode::klookaheaddecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", false]], "tensorrt_llm::runtime::speculativedecodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", false]], "tensorrt_llm::runtime::speculativedecodingmode::lookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", false]], "tensorrt_llm::runtime::speculativedecodingmode::needsdecoderprologue (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::needskvcacherewind (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::speculativedecodingmode::predictsdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::requiresattentionmask (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::speculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::updatespositionids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::variabledraftlength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::computenumpackedmasks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdraftpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxnumpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getnumpackedmasks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdecodingdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdraftpathlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdraftpathlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxnumpaths (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::~speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", false]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", false]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", false]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", false]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", false]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", false]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::tokenextraidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", false]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", false]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", false]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", false]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", false]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", false]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", false]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", false]], "tensorrt_llm::runtime::uniquetoken (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", false]], "tensorrt_llm::runtime::uniquetoken::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", false]], "tensorrt_llm::runtime::uniquetoken::tokenextraid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", false]], "tensorrt_llm::runtime::uniquetoken::tokenid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", false]], "tensorrt_llm::runtime::vectokenextraids (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", false]], "tensorrt_llm::runtime::vecuniquetokens (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", false]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::worldconfig::enableattentiondp (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", false]], "tensorrt_llm::runtime::worldconfig::getdeviceof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", false]], "tensorrt_llm::runtime::worldconfig::getlocalrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderankof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", false]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::iscontextparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirsttensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", false]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", false]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", false]], "tensorrt_llm::runtime::worldconfig::menableattentiondp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", false]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", false]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", false]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "text (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.text", false]], "text_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.text_diff", false]], "text_diff (tensorrt_llm.llmapi.completionoutput property)": [[71, "id5", false]], "timestepembedding (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.TimestepEmbedding", false]], "timesteps (class in tensorrt_llm.layers.embedding)": [[84, "tensorrt_llm.layers.embedding.Timesteps", false]], "to_dict() (tensorrt_llm.llmapi.buildconfig method)": [[71, "tensorrt_llm.llmapi.BuildConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.calibconfig method)": [[71, "tensorrt_llm.llmapi.CalibConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.quantconfig method)": [[71, "tensorrt_llm.llmapi.QuantConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.chatglmconfig method)": [[85, "tensorrt_llm.models.ChatGLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.cogvlmconfig method)": [[85, "tensorrt_llm.models.CogVLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.dbrxconfig method)": [[85, "tensorrt_llm.models.DbrxConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.falconconfig method)": [[85, "tensorrt_llm.models.FalconConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gemmaconfig method)": [[85, "tensorrt_llm.models.GemmaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptconfig method)": [[85, "tensorrt_llm.models.GPTConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptjconfig method)": [[85, "tensorrt_llm.models.GPTJConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.llamaconfig method)": [[85, "tensorrt_llm.models.LLaMAConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.medusaconfig method)": [[85, "tensorrt_llm.models.MedusaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.to_dict", false]], "to_json_file() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.to_json_file", false]], "to_layer_quant_config() (tensorrt_llm.models.pretrainedconfig method)": [[85, "tensorrt_llm.models.PretrainedConfig.to_layer_quant_config", false]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[86, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting", false]], "token_drop() (tensorrt_llm.layers.embedding.labelembedding method)": [[84, "tensorrt_llm.layers.embedding.LabelEmbedding.token_drop", false]], "token_end (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end", false]], "token_ids (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.token_ids", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[71, "tensorrt_llm.llmapi.CompletionOutput.token_ids_diff", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput property)": [[71, "id6", false]], "token_range_retention_configs (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs", false]], "token_start (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start", false]], "tokenizer (tensorrt_llm.llmapi.llm attribute)": [[71, "tensorrt_llm.llmapi.LLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm property)": [[71, "id1", false]], "tokenizer_image_token() (tensorrt_llm.runtime.multimodalmodelrunner static method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token", false]], "tokenizer_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[71, "tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length", false]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.tokens_per_block", false]], "top_k (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.top_k", false]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.top_k", false]], "top_p (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.top_p", false]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.top_p", false]], "top_p_decay (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.top_p_decay", false]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.top_p_decay", false]], "top_p_min (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.top_p_min", false]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.top_p_min", false]], "top_p_reset_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids", false]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids", false]], "topk() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.topk", false]], "torch_compile_enable_userbuffers (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_enable_userbuffers", false]], "torch_compile_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_enabled", false]], "torch_compile_fullgraph (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_fullgraph", false]], "torch_compile_inductor_enabled (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_inductor_enabled", false]], "torch_compile_piecewise_cuda_graph (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_piecewise_cuda_graph", false]], "torchllmargs (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linear class method)": [[84, "tensorrt_llm.layers.linear.Linear.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linearbase class method)": [[84, "tensorrt_llm.layers.linear.LinearBase.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.rowlinear class method)": [[84, "tensorrt_llm.layers.linear.RowLinear.tp_split_dim", false]], "transfer_mode (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[71, "tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode", false]], "transpose() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.transpose", false]], "transpose() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.transpose", false]], "trtllm-serve-disaggregated command line option": [[31, "cmdoption-trtllm-serve-disaggregated-c", false], [31, "cmdoption-trtllm-serve-disaggregated-l", false], [31, "cmdoption-trtllm-serve-disaggregated-m", false], [31, "cmdoption-trtllm-serve-disaggregated-r", false], [31, "cmdoption-trtllm-serve-disaggregated-t", false]], "trtllm-serve-disaggregated_mpi_worker command line option": [[31, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false], [31, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false]], "trtllm-serve-serve command line option": [[31, "cmdoption-trtllm-serve-serve-0", false], [31, "cmdoption-trtllm-serve-serve-arg-MODEL", false], [31, "cmdoption-trtllm-serve-serve-backend", false], [31, "cmdoption-trtllm-serve-serve-cluster_size", false], [31, "cmdoption-trtllm-serve-serve-ep_size", false], [31, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false], [31, "cmdoption-trtllm-serve-serve-gpus_per_node", false], [31, "cmdoption-trtllm-serve-serve-host", false], [31, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false], [31, "cmdoption-trtllm-serve-serve-log_level", false], [31, "cmdoption-trtllm-serve-serve-max_batch_size", false], [31, "cmdoption-trtllm-serve-serve-max_beam_width", false], [31, "cmdoption-trtllm-serve-serve-max_num_tokens", false], [31, "cmdoption-trtllm-serve-serve-max_seq_len", false], [31, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false], [31, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false], [31, "cmdoption-trtllm-serve-serve-port", false], [31, "cmdoption-trtllm-serve-serve-pp_size", false], [31, "cmdoption-trtllm-serve-serve-reasoning_parser", false], [31, "cmdoption-trtllm-serve-serve-server_role", false], [31, "cmdoption-trtllm-serve-serve-tokenizer", false], [31, "cmdoption-trtllm-serve-serve-tp_size", false], [31, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules", false]], "trtllmargs (class in tensorrt_llm.llmapi)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs", false]], "truncate_prompt_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens", false]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT", false]], "ub (tensorrt_llm.functional.allreducestrategy attribute)": [[83, "tensorrt_llm.functional.AllReduceStrategy.UB", false]], "unary() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.unary", false]], "unbind() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.unbind", false]], "unbind() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.unbind", false]], "unfuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[85, "tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections", false]], "unpatchify() (tensorrt_llm.models.dit method)": [[85, "tensorrt_llm.models.DiT.unpatchify", false]], "unsqueeze() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.unsqueeze", false]], "unsqueeze() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.unsqueeze", false]], "update() (tensorrt_llm.llmapi.buildconfig method)": [[71, "tensorrt_llm.llmapi.BuildConfig.update", false]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[88, "tensorrt_llm.runtime.SamplingConfig.update", false]], "update_from_dict() (tensorrt_llm.llmapi.buildconfig method)": [[71, "tensorrt_llm.llmapi.BuildConfig.update_from_dict", false]], "update_kv_cache_type() (tensorrt_llm.llmapi.buildconfig method)": [[71, "tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type", false]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[88, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset", false]], "update_strategy() (tensorrt_llm.functional.allreduceparams method)": [[83, "tensorrt_llm.functional.AllReduceParams.update_strategy", false]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[88, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps", false]], "use_beam_search (tensorrt_llm.llmapi.samplingparams attribute)": [[71, "tensorrt_llm.llmapi.SamplingParams.use_beam_search", false]], "use_cuda_graph (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.use_cuda_graph", false]], "use_dynamic_tree (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree", false]], "use_gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin", false]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin", false]], "use_kv_cache (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.use_kv_cache", false]], "use_kv_cache (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.use_kv_cache", false]], "use_lora() (tensorrt_llm.models.decodermodel method)": [[85, "tensorrt_llm.models.DecoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.encodermodel method)": [[85, "tensorrt_llm.models.EncoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.gemmaforcausallm method)": [[85, "tensorrt_llm.models.GemmaForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[85, "tensorrt_llm.models.GPTForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[85, "tensorrt_llm.models.LLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.mllamaforcausallm method)": [[85, "tensorrt_llm.models.MLLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phi3forcausallm method)": [[85, "tensorrt_llm.models.Phi3ForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phiforcausallm method)": [[85, "tensorrt_llm.models.PhiForCausalLM.use_lora", false]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin", false]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin", false]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin", false]], "use_meta_recipe (tensorrt_llm.llmapi.quantconfig attribute)": [[71, "tensorrt_llm.llmapi.QuantConfig.use_meta_recipe", false]], "use_mrope (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.use_mrope", false]], "use_prompt_tuning() (tensorrt_llm.models.encodermodel method)": [[85, "tensorrt_llm.models.EncoderModel.use_prompt_tuning", false]], "use_refit (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.use_refit", false]], "use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[71, "tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking", false]], "use_strip_plan (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.use_strip_plan", false]], "validate_cuda_graph_config() (tensorrt_llm.llmapi.torchllmargs method)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config", false]], "validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.torchllmargs class method)": [[71, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_max_batch_size", false]], "validate_positive_values() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[71, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values", false]], "verbatim (tensorrt_llm.models.gemmaconfig attribute)": [[85, "tensorrt_llm.models.GemmaConfig.VERBATIM", false]], "video_preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess", false]], "view() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.view", false]], "view() (tensorrt_llm.functional.tensor method)": [[83, "tensorrt_llm.functional.Tensor.view", false]], "view() (tensorrt_llm.runtime.tensorinfo method)": [[88, "tensorrt_llm.runtime.TensorInfo.view", false]], "visual_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[88, "tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir", false]], "visualize_network (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.visualize_network", false]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[88, "tensorrt_llm.runtime.GenerationSession.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[88, "tensorrt_llm.runtime.ModelConfig.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[88, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[88, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded", false]], "w4a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W4A16", false]], "w4a16_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ", false]], "w4a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ", false]], "w4a8_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ", false]], "w4a8_qserve_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL", false]], "w4a8_qserve_per_group (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP", false]], "w8a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A16", false]], "w8a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ", false]], "w8a8_sq_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL", false]], "w8a8_sq_per_channel_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN", false]], "w8a8_sq_per_channel_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[71, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN", false]], "weight_loader() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[84, "tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[84, "tensorrt_llm.layers.embedding.Embedding.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.linear.linearbase method)": [[84, "tensorrt_llm.layers.linear.LinearBase.weight_loader", false]], "weight_sparsity (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.weight_sparsity", false]], "weight_streaming (tensorrt_llm.llmapi.buildconfig attribute)": [[71, "tensorrt_llm.llmapi.BuildConfig.weight_streaming", false]], "where() (in module tensorrt_llm.functional)": [[83, "tensorrt_llm.functional.where", false]], "whisperencoder (class in tensorrt_llm.models)": [[85, "tensorrt_llm.models.WhisperEncoder", false]], "workspace (tensorrt_llm.llmapi.llm attribute)": [[71, "tensorrt_llm.llmapi.LLM.workspace", false]], "workspace (tensorrt_llm.llmapi.llm property)": [[71, "id2", false]], "workspace (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "tensorrt_llm.llmapi.TrtLlmArgs.workspace", false]], "wrapped_property (tensorrt_llm.llmapi.torchllmargs attribute)": [[71, "id12", false], [71, "id15", false], [71, "id18", false], [71, "tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property", false]], "wrapped_property (tensorrt_llm.llmapi.trtllmargs attribute)": [[71, "id21", false], [71, "id24", false], [71, "id27", false], [71, "id30", false], [71, "id33", false], [71, "tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property", false]], "yarn (tensorrt_llm.functional.positionembeddingtype attribute)": [[83, "tensorrt_llm.functional.PositionEmbeddingType.yarn", false]], "yarn (tensorrt_llm.functional.rotaryscalingtype attribute)": [[83, "tensorrt_llm.functional.RotaryScalingType.yarn", false]]}, "objects": {"": [[1, 0, 1, "c.FMT_DIM", "FMT_DIM"], [1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", "tensorrt_llm::executor::AdditionalModelOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::gatherContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::name"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", "tensorrt_llm::executor::AdditionalModelOutput::gatherContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", "tensorrt_llm::executor::AdditionalModelOutput::name"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", "tensorrt_llm::executor::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::output"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", "tensorrt_llm::executor::AdditionalOutput::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", "tensorrt_llm::executor::AdditionalOutput::output"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", "tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BufferViewE", "tensorrt_llm::executor::BufferView"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", "tensorrt_llm::executor::CacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::maxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig15getMaxNumTokensEv", "tensorrt_llm::executor::CacheTransceiverConfig::getMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig13mMaxNumTokensE", "tensorrt_llm::executor::CacheTransceiverConfig::mMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig15setMaxNumTokensE6size_t", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig15setMaxNumTokensE6size_t", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxNumTokens::maxNumTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", "tensorrt_llm::executor::CapacitySchedulerPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", "tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", "tensorrt_llm::executor::ContextChunkingPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", "tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", "tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", "tensorrt_llm::executor::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::serializedState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::state"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", "tensorrt_llm::executor::ContextPhaseParams::RequestIdType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", "tensorrt_llm::executor::ContextPhaseParams::StatePtr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter::data"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getDraftTokens"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", "tensorrt_llm::executor::ContextPhaseParams::getReqId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", "tensorrt_llm::executor::ContextPhaseParams::getSerializedState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", "tensorrt_llm::executor::ContextPhaseParams::mDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", "tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", "tensorrt_llm::executor::ContextPhaseParams::mReqId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", "tensorrt_llm::executor::ContextPhaseParams::mState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator=="], [0, 3, 1, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", "tensorrt_llm::executor::ContextPhaseParams::releaseState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", "tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", "tensorrt_llm::executor::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::cacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::commState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", "tensorrt_llm::executor::DataTransceiverState::getCacheState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", "tensorrt_llm::executor::DataTransceiverState::getCommState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", "tensorrt_llm::executor::DataTransceiverState::mCacheState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", "tensorrt_llm::executor::DataTransceiverState::mCommState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", "tensorrt_llm::executor::DataTransceiverState::toString"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", "tensorrt_llm::executor::DebugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorsMaxIterations"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", "tensorrt_llm::executor::DebugConfig::StringVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugInputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugOutputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugInputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugOutputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", "tensorrt_llm::executor::DebugConfig::mDebugTensorNames"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", "tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors::debugInputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors::debugOutputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames::debugTensorNames"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations::debugTensorsMaxIterations"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", "tensorrt_llm::executor::DebugTensorsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", "tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", "tensorrt_llm::executor::DebugTensorsPerIteration::iter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", "tensorrt_llm::executor::DecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::medusaChoices"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", "tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", "tensorrt_llm::executor::DecodingConfig::getDecodingMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", "tensorrt_llm::executor::DecodingConfig::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", "tensorrt_llm::executor::DecodingConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", "tensorrt_llm::executor::DecodingConfig::mDecodingMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", "tensorrt_llm::executor::DecodingConfig::mEagleConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", "tensorrt_llm::executor::DecodingConfig::mMedusaChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", "tensorrt_llm::executor::DecodingConfig::setDecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", "tensorrt_llm::executor::DecodingConfig::setEagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig::lookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::DecodingConfig::setMedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", "tensorrt_llm::executor::DecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", "tensorrt_llm::executor::DecodingMode::Auto"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", "tensorrt_llm::executor::DecodingMode::BeamSearch"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", "tensorrt_llm::executor::DecodingMode::Eagle"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExternalDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", "tensorrt_llm::executor::DecodingMode::Lookahead"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", "tensorrt_llm::executor::DecodingMode::Medusa"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", "tensorrt_llm::executor::DecodingMode::TopK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", "tensorrt_llm::executor::DecodingMode::TopKTopP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", "tensorrt_llm::executor::DecodingMode::TopP"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", "tensorrt_llm::executor::DecodingMode::UnderlyingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", "tensorrt_llm::executor::DecodingMode::getName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", "tensorrt_llm::executor::DecodingMode::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", "tensorrt_llm::executor::DecodingMode::isAuto"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", "tensorrt_llm::executor::DecodingMode::isBeamSearch"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", "tensorrt_llm::executor::DecodingMode::isEagle"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExternalDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", "tensorrt_llm::executor::DecodingMode::isLookahead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", "tensorrt_llm::executor::DecodingMode::isMedusa"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", "tensorrt_llm::executor::DecodingMode::isTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKandTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKorTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", "tensorrt_llm::executor::DecodingMode::isTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", "tensorrt_llm::executor::DecodingMode::isUseBanTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", "tensorrt_llm::executor::DecodingMode::isUseBanWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", "tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", "tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", "tensorrt_llm::executor::DecodingMode::isUseMinLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", "tensorrt_llm::executor::DecodingMode::isUseMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", "tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", "tensorrt_llm::executor::DecodingMode::isUseStopCriteria"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", "tensorrt_llm::executor::DecodingMode::isUseStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", "tensorrt_llm::executor::DecodingMode::isUseTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", "tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", "tensorrt_llm::executor::DecodingMode::kAuto"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", "tensorrt_llm::executor::DecodingMode::kBeamSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", "tensorrt_llm::executor::DecodingMode::kEagle"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExternalDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", "tensorrt_llm::executor::DecodingMode::kLookahead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", "tensorrt_llm::executor::DecodingMode::kMedusa"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", "tensorrt_llm::executor::DecodingMode::kNumFlags"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", "tensorrt_llm::executor::DecodingMode::kTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", "tensorrt_llm::executor::DecodingMode::kTopKTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", "tensorrt_llm::executor::DecodingMode::kTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", "tensorrt_llm::executor::DecodingMode::kUseBanTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", "tensorrt_llm::executor::DecodingMode::kUseBanWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", "tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", "tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", "tensorrt_llm::executor::DecodingMode::kUseMinLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", "tensorrt_llm::executor::DecodingMode::kUseMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", "tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePresencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", "tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", "tensorrt_llm::executor::DecodingMode::kUseStopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", "tensorrt_llm::executor::DecodingMode::kUseTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", "tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", "tensorrt_llm::executor::DecodingMode::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::x"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens::banTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords::banWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop::explicitEosStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop::maxLengthStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength::useMinLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP::useMinP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature::useTemp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch::useVariableBeamWidthSearch"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", "tensorrt_llm::executor::DisServingRequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", "tensorrt_llm::executor::DynamicBatchConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::batchSizeTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::dynamicBatchMovingAverageWindow"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableBatchSizeTuning"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableMaxNumTokensTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", "tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", "tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", "tensorrt_llm::executor::EagleChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", "tensorrt_llm::executor::EagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::dynamicTreeMaxTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::eagleChoices"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::greedySampling"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::posteriorThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::useDynamicTree"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue::value"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", "tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", "tensorrt_llm::executor::EagleConfig::getEagleChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", "tensorrt_llm::executor::EagleConfig::getPosteriorThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", "tensorrt_llm::executor::EagleConfig::isGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", "tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", "tensorrt_llm::executor::EagleConfig::mEagleChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", "tensorrt_llm::executor::EagleConfig::mGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", "tensorrt_llm::executor::EagleConfig::mPosteriorThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", "tensorrt_llm::executor::EagleConfig::mUseDynamicTree"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", "tensorrt_llm::executor::EagleConfig::useDynamicTree"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor::executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::managedWeights"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", "tensorrt_llm::executor::Executor::getKVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", "tensorrt_llm::executor::Executor::getLatestDebugTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", "tensorrt_llm::executor::Executor::isParticipant"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator=::executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::promptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::recvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::useGpuDirectStorage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", "tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", "tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDebugConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", "tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", "tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", "tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", "tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", "tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", "tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", "tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", "tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", "tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", "tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", "tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", "tensorrt_llm::executor::ExecutorConfig::mDebugConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", "tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", "tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", "tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", "tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", "tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", "tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", "tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs::additionalModelOutputs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig::cacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig::debugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig::decodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap::enableTrtOverlap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig::extendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits::gatherGenerationLogits"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent::gpuWeightsPercent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig::guidedDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig::logitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize::maxBatchSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens::maxNumTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize::maxQueueSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds::maxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading::promptTableOffloading"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs::recvPollPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig::specDecConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage::useGpuDirectStorage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::enableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::multiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize::cacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode::cudaGraphMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc::enableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode::multiBlockMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", "tensorrt_llm::executor::ExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::acceptanceThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::fastLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::logits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::tokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", "tensorrt_llm::executor::FinishReason"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", "tensorrt_llm::executor::FinishReason::kCANCELLED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", "tensorrt_llm::executor::FinishReason::kEND_ID"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", "tensorrt_llm::executor::FinishReason::kLENGTH"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", "tensorrt_llm::executor::FinishReason::kNOT_FINISHED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", "tensorrt_llm::executor::FinishReason::kSTOP_WORDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", "tensorrt_llm::executor::FinishReason::kTIMED_OUT"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", "tensorrt_llm::executor::GuidedDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::backend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::encodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::stopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", "tensorrt_llm::executor::GuidedDecodingConfig::getBackend"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", "tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", "tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", "tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::mBackend"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", "tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", "tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", "tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab::encodedVocab"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds::stopTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", "tensorrt_llm::executor::GuidedDecodingConfig::validate"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", "tensorrt_llm::executor::GuidedDecodingParams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guide"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuide"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuideType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", "tensorrt_llm::executor::GuidedDecodingParams::mGuide"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::mGuideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", "tensorrt_llm::executor::IterationStats::crossKvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", "tensorrt_llm::executor::IterationStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", "tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", "tensorrt_llm::executor::IterationStats::maxBatchSizeStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", "tensorrt_llm::executor::IterationStats::maxNumTokensRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", "tensorrt_llm::executor::IterationStats::maxNumTokensStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", "tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", "tensorrt_llm::executor::IterationStats::numCompletedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", "tensorrt_llm::executor::IterationStats::numNewActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", "tensorrt_llm::executor::IterationStats::numQueuedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12specDecStatsE", "tensorrt_llm::executor::IterationStats::specDecStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", "tensorrt_llm::executor::KVCacheCreatedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", "tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", "tensorrt_llm::executor::KVCacheEvent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::eventId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", "tensorrt_llm::executor::KVCacheEvent::data"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", "tensorrt_llm::executor::KVCacheEvent::eventId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", "tensorrt_llm::executor::KVCacheEventData"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", "tensorrt_llm::executor::KVCacheEventDiff::newValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", "tensorrt_llm::executor::KVCacheEventDiff::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", "tensorrt_llm::executor::KVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager::kvCacheManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents::timeout"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", "tensorrt_llm::executor::KVCacheEventManager::kvCacheManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", "tensorrt_llm::executor::KVCacheRemovedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", "tensorrt_llm::executor::KVCacheRemovedData::blockHashes"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", "tensorrt_llm::executor::KVCacheStoredBlockData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::loraId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::tokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", "tensorrt_llm::executor::KVCacheStoredBlockData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", "tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", "tensorrt_llm::executor::KVCacheStoredBlockData::loraId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", "tensorrt_llm::executor::KVCacheStoredBlockData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", "tensorrt_llm::executor::KVCacheStoredBlockData::tokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", "tensorrt_llm::executor::KVCacheStoredData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", "tensorrt_llm::executor::KVCacheStoredData::blocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", "tensorrt_llm::executor::KVCacheStoredData::parentHash"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", "tensorrt_llm::executor::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", "tensorrt_llm::executor::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::oldValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", "tensorrt_llm::executor::KVCacheUpdatedData::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::copyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::crossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::eventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::runtimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::secondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEEbb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults::runtimeDefaults"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", "tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", "tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", "tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", "tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", "tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", "tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse::copyOnPartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction::crossKvCacheFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse::enableBlockReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse::enablePartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize::eventBufferMaxSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction::freeGpuMemoryFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize::hostCacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec::maxAttentionWindowVec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens::maxTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks::onboardBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority::secondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength::sinkTokenLength"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeDurationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeRetentionPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::directory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::tokenRangeRetentionPriorities"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::transferMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator==::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::blockSize"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::seqLen"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", "tensorrt_llm::executor::KvCacheStats::allocNewBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", "tensorrt_llm::executor::KvCacheStats::allocTotalBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", "tensorrt_llm::executor::KvCacheStats::cacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", "tensorrt_llm::executor::KvCacheStats::missedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", "tensorrt_llm::executor::KvCacheStats::reusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", "tensorrt_llm::executor::KvCacheTransferMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", "tensorrt_llm::executor::KvCacheTransferMode::DRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", "tensorrt_llm::executor::KvCacheTransferMode::GDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", "tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorBatched"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", "tensorrt_llm::executor::LogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::replicate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched::processorBatched"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap::processorMap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate::replicate"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", "tensorrt_llm::executor::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", "tensorrt_llm::executor::LookaheadDecodingConfig::get"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE::that"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", "tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", "tensorrt_llm::executor::MillisecondsType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", "tensorrt_llm::executor::ModelType::kENCODER_DECODER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", "tensorrt_llm::executor::ModelType::kENCODER_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", "tensorrt_llm::executor::MropeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropePositionDeltas"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropeRoratySinCos"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", "tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", "tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", "tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", "tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", "tensorrt_llm::executor::OrchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::isOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::orchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::spawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::workerExecutablePath"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", "tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", "tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", "tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", "tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", "tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", "tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", "tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", "tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator::isOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm::orchLeaderComm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses::spawnProcesses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath::workerExecutablePath"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnEncoderOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", "tensorrt_llm::executor::OutputConfig::additionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", "tensorrt_llm::executor::OutputConfig::returnEncoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", "tensorrt_llm::executor::OutputConfig::returnPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::numNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", "tensorrt_llm::executor::ParallelConfig::getNumNodes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", "tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", "tensorrt_llm::executor::ParallelConfig::mNumNodes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", "tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes::numNodes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig::orchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::loraPrefetchDir"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", "tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", "tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", "tensorrt_llm::executor::PriorityType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::inputTokenExtraIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", "tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", "tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::allottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::crossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::encoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::externalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::languageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::lookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::mRopeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::multimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::positionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::returnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::skipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEE", "tensorrt_llm::executor::Request::Request::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", "tensorrt_llm::executor::Request::getAdditionalOutputNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", "tensorrt_llm::executor::Request::getAllottedTimeMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", "tensorrt_llm::executor::Request::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", "tensorrt_llm::executor::Request::getContextPhaseParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", "tensorrt_llm::executor::Request::getCrossAttentionMask"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", "tensorrt_llm::executor::Request::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", "tensorrt_llm::executor::Request::getEncoderInputFeatures"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", "tensorrt_llm::executor::Request::getEncoderInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", "tensorrt_llm::executor::Request::getEncoderOutputLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", "tensorrt_llm::executor::Request::getExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", "tensorrt_llm::executor::Request::getGuidedDecodingParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", "tensorrt_llm::executor::Request::getKvCacheRetentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", "tensorrt_llm::executor::Request::getLanguageAdapterUid"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", "tensorrt_llm::executor::Request::getLogitsPostProcessor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", "tensorrt_llm::executor::Request::getLookaheadConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", "tensorrt_llm::executor::Request::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", "tensorrt_llm::executor::Request::getMropeConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", "tensorrt_llm::executor::Request::getMultimodalEmbedding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", "tensorrt_llm::executor::Request::getPositionIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", "tensorrt_llm::executor::Request::getPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", "tensorrt_llm::executor::Request::getRequestType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", "tensorrt_llm::executor::Request::getReturnAllGeneratedTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", "tensorrt_llm::executor::Request::getSkipCrossAttnBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", "tensorrt_llm::executor::Request::kBatchedPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", "tensorrt_llm::executor::Request::kDefaultPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", "tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs::allottedTimeMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId::clientId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams::contextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask::crossAttentionMask"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig::eagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures::encoderInputFeatures"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds::encoderInputTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength::encoderOutputLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig::externalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams::guidedDecodingParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig::kvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid::languageAdapterUid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor::logitsPostProcessor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig::lookaheadConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig::mRopeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding::multimodalEmbedding"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds::positionIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType::requestType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens::returnAllGeneratedTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", "tensorrt_llm::executor::RequestPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::RequestPerfMetrics::TimePoint"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", "tensorrt_llm::executor::RequestPerfMetrics::firstIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", "tensorrt_llm::executor::RequestPerfMetrics::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", "tensorrt_llm::executor::RequestPerfMetrics::lastIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", "tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::timingMetrics"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", "tensorrt_llm::executor::RequestStats::disServingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", "tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::missedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", "tensorrt_llm::executor::RequestType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::Result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", "tensorrt_llm::executor::Response::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", "tensorrt_llm::executor::Result::additionalOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", "tensorrt_llm::executor::Result::contextPhaseParams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", "tensorrt_llm::executor::Result::decodingIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", "tensorrt_llm::executor::Result::encoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", "tensorrt_llm::executor::Result::finishReasons"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", "tensorrt_llm::executor::Result::isSequenceFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", "tensorrt_llm::executor::Result::requestPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", "tensorrt_llm::executor::Result::sequenceIndex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", "tensorrt_llm::executor::Result::specDecFastLogitsInfo"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", "tensorrt_llm::executor::RetentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", "tensorrt_llm::executor::RetentionPriorityAndDuration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::retentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", "tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", "tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::noRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::seed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty::repetitionpenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidthArray"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", "tensorrt_llm::executor::SamplingConfig::getMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", "tensorrt_llm::executor::SamplingConfig::getMinTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", "tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnBeams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnSequences"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", "tensorrt_llm::executor::SamplingConfig::getSeed"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", "tensorrt_llm::executor::SamplingConfig::mBeamWidthArray"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", "tensorrt_llm::executor::SamplingConfig::mMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", "tensorrt_llm::executor::SamplingConfig::mMinTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", "tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", "tensorrt_llm::executor::SamplingConfig::mNumReturnBeams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", "tensorrt_llm::executor::SamplingConfig::mNumReturnSequences"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", "tensorrt_llm::executor::SamplingConfig::mSeed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty::frequencyPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty::presencePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty::repetitionPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed::seed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::capacitySchedulerPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::contextChunkingPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::dynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", "tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", "tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", "tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", "tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13SerializationE", "tensorrt_llm::executor::Serialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize::iterStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize::requestStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize::responses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::stats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize::disServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize::specDecStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize::tokenRangeRetentionConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", "tensorrt_llm::executor::Shape::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType32E", "tensorrt_llm::executor::SizeType32"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType64E", "tensorrt_llm::executor::SizeType64"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", "tensorrt_llm::executor::SpecDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", "tensorrt_llm::executor::SpecDecodingStats::acceptanceLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", "tensorrt_llm::executor::SpecDecodingStats::draftOverhead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", "tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", "tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::fastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 2, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", "tensorrt_llm::executor::VecTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", "tensorrt_llm::executor::detail::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", "tensorrt_llm::executor::disagg_executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasContextAwaitThreads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasGenAwaitThreads"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::contextIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::timeout"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::genIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::selectContextId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::globalRequestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::selectGenIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", "tensorrt_llm::executor::disagg_executor::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::gid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", "tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc::backendAgentDesc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", "tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", "tensorrt_llm::executor::kv_cache::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::agentName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::connectionInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", "tensorrt_llm::executor::kv_cache::AgentState::mAgentName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", "tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", "tensorrt_llm::executor::kv_cache::AgentState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::memoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::connectRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::connectRemoteAgent::connectionInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent18connectRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::connectRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getConnectionInfoEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getConnectionInfo"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::agentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::syncMessage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::kvFactor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::modelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbAttentionLayers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeadPerLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE13AttentionTypeibii", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigEN8nvinfer18DataTypeE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::worldConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", "tensorrt_llm::executor::kv_cache::CacheState::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getModelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", "tensorrt_llm::executor::kv_cache::CacheState::mDataType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", "tensorrt_llm::executor::kv_cache::CacheState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", "tensorrt_llm::executor::kv_cache::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::agentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::ip"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::port"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::ranks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::socketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::getAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::getMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", "tensorrt_llm::executor::kv_cache::CommState::getSelfIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::getSocketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::isAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::isMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::isSocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", "tensorrt_llm::executor::kv_cache::CommState::mSelfIdx"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", "tensorrt_llm::executor::kv_cache::CommState::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", "tensorrt_llm::executor::kv_cache::CommState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", "tensorrt_llm::executor::kv_cache::Connection"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", "tensorrt_llm::executor::kv_cache::Connection::isThreadSafe"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::size"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", "tensorrt_llm::executor::kv_cache::Connection::~Connection"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", "tensorrt_llm::executor::kv_cache::ConnectionInfoType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", "tensorrt_llm::executor::kv_cache::ConnectionManager"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::ctx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", "tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", "tensorrt_llm::executor::kv_cache::DataContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext::tag"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", "tensorrt_llm::executor::kv_cache::DataContext::getTag"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", "tensorrt_llm::executor::kv_cache::DataContext::mTag"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", "tensorrt_llm::executor::kv_cache::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::handle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::symbol"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::FunctionT"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::funcName"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::libName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", "tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", "tensorrt_llm::executor::kv_cache::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::vec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize::is"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::memoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::os"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize::memoryDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::descs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", "tensorrt_llm::executor::kv_cache::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", "tensorrt_llm::executor::kv_cache::MemoryType::kBLK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kDRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", "tensorrt_llm::executor::kv_cache::MemoryType::kFILE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", "tensorrt_llm::executor::kv_cache::MemoryType::kOBJ"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kVRAM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", "tensorrt_llm::executor::kv_cache::MpiState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", "tensorrt_llm::executor::kv_cache::MpiState::mRanks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", "tensorrt_llm::executor::kv_cache::MpiState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", "tensorrt_llm::executor::kv_cache::RegisterDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", "tensorrt_llm::executor::kv_cache::SocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", "tensorrt_llm::executor::kv_cache::SocketState::mIp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", "tensorrt_llm::executor::kv_cache::SocketState::mPort"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", "tensorrt_llm::executor::kv_cache::SocketState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", "tensorrt_llm::executor::kv_cache::SyncMessage"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", "tensorrt_llm::executor::kv_cache::TransferDescs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", "tensorrt_llm::executor::kv_cache::TransferOp"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", "tensorrt_llm::executor::kv_cache::TransferOp::kREAD"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", "tensorrt_llm::executor::kv_cache::TransferOp::kWRITE"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", "tensorrt_llm::executor::kv_cache::TransferRequest"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::dstDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::op"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::remoteName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::srcDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::syncMessage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getOp"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", "tensorrt_llm::executor::kv_cache::TransferRequest::mOp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", "tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", "tensorrt_llm::executor::kv_cache::TransferStatus"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", "tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", "tensorrt_llm::executor::kv_cache::TransferStatus::wait"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", "tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7versionEv", "tensorrt_llm::executor::version"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm3mpiE", "tensorrt_llm::mpi"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", "tensorrt_llm::runtime::AllReduceBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::fakeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", "tensorrt_llm::runtime::AllReduceBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", "tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", "tensorrt_llm::runtime::BufferDataType::getSizeInBits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", "tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", "tensorrt_llm::runtime::BufferManager::mPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::endIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::acceptedLens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::acceptedPathIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::acceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::chunkedContextNextTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::lastDraftLens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::lastDraftPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::lastDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::nextDraftLens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::nextDraftPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::nextDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr", "tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs::seqSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", "tensorrt_llm::runtime::DecodingInput::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", "tensorrt_llm::runtime::DecodingInput::badWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", "tensorrt_llm::runtime::DecodingInput::beamWidths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", "tensorrt_llm::runtime::DecodingInput::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", "tensorrt_llm::runtime::DecodingInput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", "tensorrt_llm::runtime::DecodingInput::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", "tensorrt_llm::runtime::DecodingInput::logits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::lookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", "tensorrt_llm::runtime::DecodingInput::stopWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::gatheredIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::ids"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", "tensorrt_llm::runtime::DecodingOutput::eagleBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", "tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", "tensorrt_llm::runtime::DecodingOutput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", "tensorrt_llm::runtime::DecodingOutput::gatheredIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", "tensorrt_llm::runtime::DecodingOutput::logProbsTiled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", "tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", "tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", "tensorrt_llm::runtime::DeviceAllocationNvls::_capacity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", "tensorrt_llm::runtime::DeviceAllocationNvls::_handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", "tensorrt_llm::runtime::DeviceAllocationNvls::free"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", "tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", "tensorrt_llm::runtime::EagleBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", "tensorrt_llm::runtime::EagleBuffers::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", "tensorrt_llm::runtime::EagleBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", "tensorrt_llm::runtime::EagleBuffers::Inputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", "tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", "tensorrt_llm::runtime::EagleBuffers::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", "tensorrt_llm::runtime::EagleBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", "tensorrt_llm::runtime::EagleBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", "tensorrt_llm::runtime::EagleBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", "tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", "tensorrt_llm::runtime::EagleBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", "tensorrt_llm::runtime::EagleBuffers::engineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", "tensorrt_llm::runtime::EagleBuffers::greedySamplingHost"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", "tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", "tensorrt_llm::runtime::EagleBuffers::maxGenerationLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::eagleModule"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", "tensorrt_llm::runtime::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxNonLeafNodesPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::numTransformersLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", "tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", "tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", "tensorrt_llm::runtime::EagleModule::getNumTransformerLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", "tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", "tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", "tensorrt_llm::runtime::EagleModule::mNumTransformersLayer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::explicitDraftTokensModule"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", "tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", "tensorrt_llm::runtime::GptDecoder::mDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE", "tensorrt_llm::runtime::GptDecoder::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", "tensorrt_llm::runtime::GptDecoder::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoder::mVocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::requests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", "tensorrt_llm::runtime::GptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::GptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forward::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::output"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", "tensorrt_llm::runtime::GptDecoderBatched::getBufferManager"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", "tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatched::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::runtimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", "tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", "tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", "tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName::dataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::requests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", "tensorrt_llm::runtime::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", "tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::IGptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", "tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", "tensorrt_llm::runtime::ITensor::DimType64"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", "tensorrt_llm::runtime::ITensor::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::sliceN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::tensor"], [1, 3, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension"], [1, 8, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension::n"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", "tensorrt_llm::runtime::IpcMemory::BufferPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::openIpc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", "tensorrt_llm::runtime::IpcMemory::mBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", "tensorrt_llm::runtime::IpcMemory::mOpenIpc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", "tensorrt_llm::runtime::IpcMemory::mTpRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", "tensorrt_llm::runtime::IpcNvlsHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_va"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", "tensorrt_llm::runtime::IpcNvlsHandle::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_va"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", "tensorrt_llm::runtime::LookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxTokensPerStep"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", "tensorrt_llm::runtime::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", "tensorrt_llm::runtime::LookaheadModule::getExecutionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", "tensorrt_llm::runtime::LookaheadModule::mExecutionConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig::config"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::tokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numGenSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::tokensPerStep"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::decoderLookaheadBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::runtime"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", "tensorrt_llm::runtime::LoraCacheFullException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", "tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", "tensorrt_llm::runtime::LoraExpectedException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", "tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numExperts"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::isDora"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::tpSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", "tensorrt_llm::runtime::LoraTaskIdType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier::ranks"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", "tensorrt_llm::runtime::MedusaModule"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::MedusaChoices"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxAcceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", "tensorrt_llm::runtime::MedusaModule::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", "tensorrt_llm::runtime::MedusaModule::getMedusaChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", "tensorrt_llm::runtime::MemoryCounters::SizeType32"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPool"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", "tensorrt_llm::runtime::MemoryType::kPINNEDPOOL"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", "tensorrt_llm::runtime::ModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::KVCacheType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString::value"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", "tensorrt_llm::runtime::ModelConfig::LayerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", "tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", "tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", "tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", "tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbAttentionLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbRnnLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", "tensorrt_llm::runtime::ModelConfig::ModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", "tensorrt_llm::runtime::ModelConfig::RnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits::computeContextLogits"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", "tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding::maxDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", "tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", "tensorrt_llm::runtime::ModelConfig::getKVCacheType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getKvDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", "tensorrt_llm::runtime::ModelConfig::getLayerTypes"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", "tensorrt_llm::runtime::ModelConfig::getLogitsDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::ModelConfig::getLoraModules"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", "tensorrt_llm::runtime::ModelConfig::getManageWeightsType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxInputLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::ModelConfig::getMaxLoraRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxNumTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", "tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", "tensorrt_llm::runtime::ModelConfig::getModelName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", "tensorrt_llm::runtime::ModelConfig::getModelVariant"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::ModelConfig::getNbHeads"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads::layerIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", "tensorrt_llm::runtime::ModelConfig::getNumLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", "tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", "tensorrt_llm::runtime::ModelConfig::getPpReduceScatter"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", "tensorrt_llm::runtime::ModelConfig::getQuantMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::getRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", "tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::ModelConfig::getSizePerHead"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::ModelConfig::getTokensPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::ModelConfig::getVocabSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded::worldSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::hasRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isContinuousKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", "tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", "tensorrt_llm::runtime::ModelConfig::isMultiModal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isPagedKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", "tensorrt_llm::runtime::ModelConfig::isRnnBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::ModelConfig::isTransformerBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", "tensorrt_llm::runtime::ModelConfig::isWhisper"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", "tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", "tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", "tensorrt_llm::runtime::ModelConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", "tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", "tensorrt_llm::runtime::ModelConfig::mInputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::mKVCacheType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", "tensorrt_llm::runtime::ModelConfig::mLayerTypes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", "tensorrt_llm::runtime::ModelConfig::mLogitsDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", "tensorrt_llm::runtime::ModelConfig::mLoraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::mManageWeightsType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", "tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::ModelConfig::mMaxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::ModelConfig::mMaxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::ModelConfig::mMaxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", "tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", "tensorrt_llm::runtime::ModelConfig::mModelName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", "tensorrt_llm::runtime::ModelConfig::mModelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", "tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", "tensorrt_llm::runtime::ModelConfig::mNbHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", "tensorrt_llm::runtime::ModelConfig::mNbLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", "tensorrt_llm::runtime::ModelConfig::mNbRnnLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", "tensorrt_llm::runtime::ModelConfig::mNumLanguages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", "tensorrt_llm::runtime::ModelConfig::mPagedState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", "tensorrt_llm::runtime::ModelConfig::mPpReduceScatter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", "tensorrt_llm::runtime::ModelConfig::mQuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", "tensorrt_llm::runtime::ModelConfig::mRnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", "tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::ModelConfig::mSizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", "tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::ModelConfig::mTokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", "tensorrt_llm::runtime::ModelConfig::mUseCrossAttention"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", "tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", "tensorrt_llm::runtime::ModelConfig::mUseMrope"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", "tensorrt_llm::runtime::ModelConfig::mUseShapeInference"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", "tensorrt_llm::runtime::ModelConfig::mVocabSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA::contextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize::encoderHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType::kvCacheType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes::layerTypes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules::loraModules"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType::manageWeightType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize::maxBatchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen::maxEncoderLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen::maxInputLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank::maxLoraRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens::maxNumTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings::maxPositionEmbeddings"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName::modelName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant::modelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages::numLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter::ppReduceScatter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode::QuantMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig::rnnConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim::rotaryEmbeddingDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead::sizePerHead"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode::mode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule::speculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding::usePositionEmbedding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference::useShapeInference"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding::useTokenTypeEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", "tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::ModelConfig::supportsInflightBatching"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", "tensorrt_llm::runtime::ModelConfig::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", "tensorrt_llm::runtime::ModelConfig::useLanguageAdapter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin::useLoraPlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", "tensorrt_llm::runtime::ModelConfig::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput::inputPacked"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState::pagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::usePositionEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::ModelConfig::usePromptTuning"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", "tensorrt_llm::runtime::ModelConfig::useShapeInference"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", "tensorrt_llm::runtime::PromptTuningParams::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", "tensorrt_llm::runtime::RawEngine"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::HostMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineAddr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine::enginePath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", "tensorrt_llm::runtime::RawEngine::Type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::Type::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::Type::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::Type::HostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", "tensorrt_llm::runtime::RawEngine::getAddress"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", "tensorrt_llm::runtime::RawEngine::getHostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", "tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", "tensorrt_llm::runtime::RawEngine::getPath"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", "tensorrt_llm::runtime::RawEngine::getPathOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", "tensorrt_llm::runtime::RawEngine::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", "tensorrt_llm::runtime::RawEngine::getType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", "tensorrt_llm::runtime::RawEngine::mEngineAddr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", "tensorrt_llm::runtime::RawEngine::mEngineBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", "tensorrt_llm::runtime::RawEngine::mEnginePath"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", "tensorrt_llm::runtime::RawEngine::mEngineSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", "tensorrt_llm::runtime::RawEngine::mManagedWeightsMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", "tensorrt_llm::runtime::RawEngine::mType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap::managedWeightsMap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath::enginePath"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", "tensorrt_llm::runtime::RequestType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", "tensorrt_llm::runtime::RequestType::kCONTEXT"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", "tensorrt_llm::runtime::RequestType::kGENERATION"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::runtime::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::maxAttentionWindowVec"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", "tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", "tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::externalDraftTokensConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", "tensorrt_llm::runtime::SamplingConfig::beamWidthArray"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", "tensorrt_llm::runtime::SamplingConfig::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::defaultValue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", "tensorrt_llm::runtime::SamplingConfig::minP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", "tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", "tensorrt_llm::runtime::SamplingConfig::numReturnSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", "tensorrt_llm::runtime::SamplingConfig::originalTemperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", "tensorrt_llm::runtime::SamplingConfig::outputLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::defaultValue"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::vec"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", "tensorrt_llm::runtime::SamplingConfig::validate"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::max"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::min"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::name"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::vec"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", "tensorrt_llm::runtime::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", "tensorrt_llm::runtime::SizeType64"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", "tensorrt_llm::runtime::SpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::None"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode::state"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", "tensorrt_llm::runtime::SpeculativeDecodingMode::mState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator==::other"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", "tensorrt_llm::runtime::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator=::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen::maxDraftPathLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens::maxDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths::maxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", "tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", "tensorrt_llm::runtime::TokenExtraIdType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::runtime::UniqueToken"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", "tensorrt_llm::runtime::UniqueToken::tokenExtraId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", "tensorrt_llm::runtime::UniqueToken::tokenId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", "tensorrt_llm::runtime::VecTokenExtraIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", "tensorrt_llm::runtime::VecUniqueTokens"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", "tensorrt_llm::runtime::WorldConfig::enableAttentionDP"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", "tensorrt_llm::runtime::WorldConfig::getLocalRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", "tensorrt_llm::runtime::WorldConfig::getNodeRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", "tensorrt_llm::runtime::WorldConfig::isContextParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", "tensorrt_llm::runtime::WorldConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", "tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer::worldConfig"], [1, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxSequenceLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", "tensorrt_llm::runtime::decoder::DecoderState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState::dtype"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", "tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", "tensorrt_llm::runtime::decoder::DecoderState::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", "tensorrt_llm::runtime::decoder::DecoderState::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState34allocateSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::allocateSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishedStepsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSteps"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBatchSizeEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getParentIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", "tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14mFinishedStepsE", "tensorrt_llm::runtime::decoder::DecoderState::mFinishedSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBatchSizeE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::numTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState10setupEagleEN12EagleBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupEagle"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState10setupEagleEN12EagleBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupEagle::eagleBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupExplicitDraftTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::decoder::DecoderState::setupExplicitDraftTokens::explicitDraftTokensBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14setupLookaheadE24LookaheadDecodingBuffers", "tensorrt_llm::runtime::decoder::DecoderState::setupLookahead"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14setupLookaheadE24LookaheadDecodingBuffers", "tensorrt_llm::runtime::decoder::DecoderState::setupLookahead::lookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", "tensorrt_llm::runtime::decoder_batch::Input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32", "tensorrt_llm::runtime::decoder_batch::Input::Input::maxDecoderSteps"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input10batchSlotsE", "tensorrt_llm::runtime::decoder_batch::Input::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input22batchSlotsRequestOrderE", "tensorrt_llm::runtime::decoder_batch::Input::batchSlotsRequestOrder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input11eagleInputsE", "tensorrt_llm::runtime::decoder_batch::Input::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15eagleLastInputsE", "tensorrt_llm::runtime::decoder_batch::Input::eagleLastInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input25explicitDraftTokensInputsE", "tensorrt_llm::runtime::decoder_batch::Input::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input29explicitDraftTokensLastInputsE", "tensorrt_llm::runtime::decoder_batch::Input::explicitDraftTokensLastInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15generationStepsE", "tensorrt_llm::runtime::decoder_batch::Input::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", "tensorrt_llm::runtime::decoder_batch::Input::logits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15maxDecoderStepsE", "tensorrt_llm::runtime::decoder_batch::Input::maxDecoderSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input20predictedDraftLogitsE", "tensorrt_llm::runtime::decoder_batch::Input::predictedDraftLogits"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", "tensorrt_llm::runtime::decoder_batch::Output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output6OutputEv", "tensorrt_llm::runtime::decoder_batch::Output::Output"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Output::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6Output16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Output::cacheIndirection"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", "tensorrt_llm::runtime::decoder_batch::Request"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", "tensorrt_llm::runtime::decoder_batch::Request::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::ids"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::inputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::decoder_batch::Request::Request::maxNewTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::badWordsList"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE", "tensorrt_llm::runtime::decoder_batch::Request::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", "tensorrt_llm::runtime::decoder_batch::Request::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5dtypeE", "tensorrt_llm::runtime::decoder_batch::Request::dtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11eagleConfigE", "tensorrt_llm::runtime::decoder_batch::Request::eagleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", "tensorrt_llm::runtime::decoder_batch::Request::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", "tensorrt_llm::runtime::decoder_batch::Request::endId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE", "tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerEngineStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", "tensorrt_llm::runtime::decoder_batch::Request::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", "tensorrt_llm::runtime::decoder_batch::Request::inputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request22lookaheadRuntimeConfigE", "tensorrt_llm::runtime::decoder_batch::Request::lookaheadRuntimeConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", "tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE", "tensorrt_llm::runtime::decoder_batch::Request::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE", "tensorrt_llm::runtime::decoder_batch::Request::medusaTreeIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::stopWordsList"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots::batchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", "tensorrt_llm::runtime::ipcNvlsSupported"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_0"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_1"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_2"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [88, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[83, 9, 0, "-", "functional"], [85, 9, 0, "-", "models"], [86, 9, 0, "-", "plugin"], [87, 9, 0, "-", "quantization"], [88, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[83, 10, 1, "", "AllReduceFusionOp"], [83, 10, 1, "", "AllReduceParams"], [83, 10, 1, "", "AllReduceStrategy"], [83, 10, 1, "", "AttentionMaskType"], [83, 10, 1, "", "Conditional"], [83, 10, 1, "", "DimRange"], [83, 10, 1, "", "LayerNormPositionType"], [83, 10, 1, "", "LayerNormType"], [83, 10, 1, "", "MLPType"], [83, 10, 1, "", "PositionEmbeddingType"], [83, 10, 1, "", "RopeEmbeddingUtils"], [83, 10, 1, "", "RotaryScalingType"], [83, 10, 1, "", "SideStreamIDType"], [83, 10, 1, "", "SliceInputType"], [83, 10, 1, "", "Tensor"], [83, 14, 1, "", "abs"], [83, 14, 1, "", "activation"], [83, 14, 1, "", "add"], [83, 14, 1, "", "allgather"], [83, 14, 1, "", "allreduce"], [83, 14, 1, "", "arange"], [83, 14, 1, "", "argmax"], [83, 14, 1, "", "assertion"], [83, 14, 1, "", "avg_pool2d"], [83, 14, 1, "", "bert_attention"], [83, 14, 1, "", "broadcast_helper"], [83, 14, 1, "", "cast"], [83, 14, 1, "", "categorical_sample"], [83, 14, 1, "", "chunk"], [83, 14, 1, "", "clip"], [83, 14, 1, "", "concat"], [83, 14, 1, "", "constant"], [83, 14, 1, "", "constant_to_tensor_"], [83, 14, 1, "", "constants_to_tensors_"], [83, 14, 1, "", "conv1d"], [83, 14, 1, "", "conv2d"], [83, 14, 1, "", "conv3d"], [83, 14, 1, "", "conv_transpose2d"], [83, 14, 1, "", "cos"], [83, 14, 1, "", "cp_split_plugin"], [83, 14, 1, "", "create_allreduce_plugin"], [83, 14, 1, "", "cuda_stream_sync"], [83, 14, 1, "", "cumsum"], [83, 14, 1, "", "div"], [83, 14, 1, "", "dora_plugin"], [83, 14, 1, "", "einsum"], [83, 14, 1, "", "elementwise_binary"], [83, 14, 1, "", "embedding"], [83, 14, 1, "", "eq"], [83, 14, 1, "", "exp"], [83, 14, 1, "", "expand"], [83, 14, 1, "", "expand_dims"], [83, 14, 1, "", "expand_dims_like"], [83, 14, 1, "", "expand_mask"], [83, 14, 1, "", "flatten"], [83, 14, 1, "", "flip"], [83, 14, 1, "", "floordiv"], [83, 14, 1, "", "gather"], [83, 14, 1, "", "gather_last_token_logits"], [83, 14, 1, "", "gather_nd"], [83, 14, 1, "", "gegelu"], [83, 14, 1, "", "geglu"], [83, 14, 1, "", "gelu"], [83, 14, 1, "", "gemm_allreduce"], [83, 14, 1, "", "gemm_swiglu"], [83, 14, 1, "", "generate_alibi_biases"], [83, 14, 1, "", "generate_alibi_slopes"], [83, 14, 1, "", "generate_logn_scaling"], [83, 14, 1, "", "gpt_attention"], [83, 14, 1, "", "group_norm"], [83, 14, 1, "", "gt"], [83, 14, 1, "", "identity"], [83, 14, 1, "", "index_select"], [83, 14, 1, "", "int_clip"], [83, 14, 1, "", "interpolate"], [83, 14, 1, "", "is_gated_activation"], [83, 14, 1, "", "layer_norm"], [83, 14, 1, "", "log"], [83, 14, 1, "", "log_softmax"], [83, 14, 1, "", "lora_plugin"], [83, 14, 1, "", "low_latency_gemm"], [83, 14, 1, "", "low_latency_gemm_swiglu"], [83, 14, 1, "", "lt"], [83, 14, 1, "", "mamba_conv1d"], [83, 14, 1, "", "masked_scatter"], [83, 14, 1, "", "masked_select"], [83, 14, 1, "", "matmul"], [83, 14, 1, "", "max"], [83, 14, 1, "", "maximum"], [83, 14, 1, "", "mean"], [83, 14, 1, "", "meshgrid2d"], [83, 14, 1, "", "min"], [83, 14, 1, "", "minimum"], [83, 14, 1, "", "modulo"], [83, 14, 1, "", "mul"], [83, 14, 1, "", "non_gated_version"], [83, 14, 1, "", "nonzero"], [83, 14, 1, "", "not_op"], [83, 14, 1, "", "op_and"], [83, 14, 1, "", "op_or"], [83, 14, 1, "", "op_xor"], [83, 14, 1, "", "outer"], [83, 14, 1, "", "pad"], [83, 14, 1, "", "permute"], [83, 14, 1, "", "pow"], [83, 14, 1, "", "prod"], [83, 14, 1, "", "quick_gelu"], [83, 14, 1, "", "rand"], [83, 14, 1, "", "rearrange"], [83, 14, 1, "", "recv"], [83, 14, 1, "", "reduce"], [83, 14, 1, "", "reduce_scatter"], [83, 14, 1, "", "relu"], [83, 14, 1, "", "repeat"], [83, 14, 1, "", "repeat_interleave"], [83, 14, 1, "", "rg_lru"], [83, 14, 1, "", "rms_norm"], [83, 14, 1, "", "round"], [83, 14, 1, "", "scatter"], [83, 14, 1, "", "scatter_nd"], [83, 14, 1, "", "select"], [83, 14, 1, "", "selective_scan"], [83, 14, 1, "", "send"], [83, 14, 1, "", "shape"], [83, 14, 1, "", "sigmoid"], [83, 14, 1, "", "silu"], [83, 14, 1, "", "sin"], [83, 14, 1, "", "slice"], [83, 14, 1, "", "softmax"], [83, 14, 1, "", "softplus"], [83, 14, 1, "", "split"], [83, 14, 1, "", "sqrt"], [83, 14, 1, "", "squared_relu"], [83, 14, 1, "", "squeeze"], [83, 14, 1, "", "stack"], [83, 14, 1, "", "sub"], [83, 14, 1, "", "sum"], [83, 14, 1, "", "swiglu"], [83, 14, 1, "", "tanh"], [83, 14, 1, "", "topk"], [83, 14, 1, "", "transpose"], [83, 14, 1, "", "unary"], [83, 14, 1, "", "unbind"], [83, 14, 1, "", "unsqueeze"], [83, 14, 1, "", "view"], [83, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceFusionOp": [[83, 11, 1, "", "LAST_PROCESS_FOR_UB"], [83, 11, 1, "", "MOE_ALLREDUCE_RESIDUAL_RMS_NORM"], [83, 11, 1, "", "NONE"], [83, 11, 1, "", "RESIDUAL_RMS_NORM"], [83, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_FP8"], [83, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"], [83, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_FP8"], [83, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_NVFP4"], [83, 11, 1, "", "RESIDUAL_RMS_PREPOST_NORM"]], "tensorrt_llm.functional.AllReduceParams": [[83, 12, 1, "", "has_affine"], [83, 12, 1, "", "has_bias"], [83, 12, 1, "", "has_scale"], [83, 12, 1, "", "update_strategy"]], "tensorrt_llm.functional.AllReduceStrategy": [[83, 11, 1, "", "AUTO"], [83, 11, 1, "", "LOWPRECISION"], [83, 11, 1, "", "MIN_LATENCY"], [83, 11, 1, "", "NCCL"], [83, 11, 1, "", "ONESHOT"], [83, 11, 1, "", "TWOSHOT"], [83, 11, 1, "", "UB"]], "tensorrt_llm.functional.AttentionMaskType": [[83, 11, 1, "", "bidirectional"], [83, 11, 1, "", "bidirectionalglm"], [83, 11, 1, "", "blocksparse"], [83, 11, 1, "", "causal"], [83, 11, 1, "", "custom_mask"], [83, 11, 1, "", "padding"], [83, 11, 1, "", "sliding_window_causal"]], "tensorrt_llm.functional.Conditional": [[83, 12, 1, "", "add_input"], [83, 12, 1, "", "add_output"]], "tensorrt_llm.functional.LayerNormPositionType": [[83, 11, 1, "", "post_layernorm"], [83, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[83, 11, 1, "", "GroupNorm"], [83, 11, 1, "", "LayerNorm"], [83, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[83, 11, 1, "", "FusedGatedMLP"], [83, 11, 1, "", "GatedMLP"], [83, 11, 1, "", "MLP"]], "tensorrt_llm.functional.PositionEmbeddingType": [[83, 11, 1, "", "alibi"], [83, 11, 1, "", "alibi_with_scale"], [83, 11, 1, "", "chatglm"], [83, 12, 1, "", "choices"], [83, 11, 1, "", "deferred"], [83, 12, 1, "", "from_string"], [83, 12, 1, "", "is_alibi"], [83, 12, 1, "", "is_deferred"], [83, 12, 1, "", "is_mrope"], [83, 12, 1, "", "is_rope"], [83, 11, 1, "", "learned_absolute"], [83, 11, 1, "", "long_rope"], [83, 11, 1, "", "mrope"], [83, 11, 1, "", "relative"], [83, 11, 1, "", "rope_gpt_neox"], [83, 11, 1, "", "rope_gptj"], [83, 11, 1, "", "yarn"]], "tensorrt_llm.functional.RopeEmbeddingUtils": [[83, 12, 1, "", "apply_llama3_scaling"], [83, 12, 1, "", "apply_rotary_pos_emb"], [83, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [83, 12, 1, "", "apply_rotary_pos_emb_cogvlm"], [83, 12, 1, "", "create_fake_weight"], [83, 12, 1, "", "create_sinusoidal_positions"], [83, 12, 1, "", "create_sinusoidal_positions_for_attention_plugin"], [83, 12, 1, "", "create_sinusoidal_positions_for_cogvlm_attention_plugin"], [83, 12, 1, "", "create_sinusoidal_positions_long_rope"], [83, 12, 1, "", "create_sinusoidal_positions_yarn"], [83, 12, 1, "", "rotate_every_two"], [83, 12, 1, "", "rotate_half"]], "tensorrt_llm.functional.RotaryScalingType": [[83, 11, 1, "", "dynamic"], [83, 12, 1, "", "from_string"], [83, 11, 1, "", "linear"], [83, 11, 1, "", "llama3"], [83, 11, 1, "", "longrope"], [83, 11, 1, "", "mrope"], [83, 11, 1, "", "none"], [83, 11, 1, "", "yarn"]], "tensorrt_llm.functional.SideStreamIDType": [[83, 11, 1, "", "disable"], [83, 11, 1, "", "moe"]], "tensorrt_llm.functional.SliceInputType": [[83, 11, 1, "", "axes"], [83, 11, 1, "", "data"], [83, 11, 1, "", "fill_value"], [83, 11, 1, "", "size"], [83, 11, 1, "", "start"], [83, 11, 1, "", "stride"]], "tensorrt_llm.functional.Tensor": [[83, 12, 1, "", "abs"], [83, 12, 1, "", "cast"], [83, 13, 1, "", "dtype"], [83, 12, 1, "", "flatten"], [83, 12, 1, "", "get_parent"], [83, 12, 1, "", "get_users"], [83, 12, 1, "", "is_dynamic"], [83, 12, 1, "", "is_trt_wrapper"], [83, 13, 1, "", "location"], [83, 12, 1, "", "log"], [83, 12, 1, "", "mark_output"], [83, 12, 1, "", "max"], [83, 12, 1, "", "mean"], [83, 13, 1, "", "name"], [83, 12, 1, "", "ndim"], [83, 13, 1, "", "network"], [83, 12, 1, "", "permute"], [83, 12, 1, "", "rank"], [83, 12, 1, "", "repeat"], [83, 12, 1, "", "replace_all_uses_with"], [83, 12, 1, "", "select"], [83, 13, 1, "", "shape"], [83, 12, 1, "", "size"], [83, 12, 1, "", "split"], [83, 12, 1, "", "sqrt"], [83, 12, 1, "", "squeeze"], [83, 12, 1, "", "transpose"], [83, 12, 1, "", "unbind"], [83, 12, 1, "", "unsqueeze"], [83, 12, 1, "", "view"]], "tensorrt_llm.layers": [[84, 9, 0, "-", "activation"], [84, 9, 0, "-", "attention"], [84, 9, 0, "-", "cast"], [84, 9, 0, "-", "conv"], [84, 9, 0, "-", "embedding"], [84, 9, 0, "-", "linear"], [84, 9, 0, "-", "mlp"], [84, 9, 0, "-", "normalization"], [84, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[84, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[84, 10, 1, "", "Attention"], [84, 10, 1, "", "AttentionMaskParams"], [84, 10, 1, "", "AttentionParams"], [84, 10, 1, "", "BertAttention"], [84, 10, 1, "", "BlockSparseAttnParams"], [84, 10, 1, "", "CogVLMAttention"], [84, 10, 1, "", "DeepseekV2Attention"], [84, 10, 1, "", "DiffusersAttention"], [84, 10, 1, "", "KeyValueCacheParams"], [84, 10, 1, "", "MropeParams"], [84, 10, 1, "", "SpecDecodingParams"], [84, 14, 1, "", "compute_relative_bias"], [84, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[84, 12, 1, "", "create_attention_const_params"], [84, 12, 1, "", "fill_attention_params"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "postprocess"], [84, 12, 1, "", "set_rel_attn_table"]], "tensorrt_llm.layers.attention.AttentionParams": [[84, 12, 1, "", "fill_attention_const_params_for_long_rope"], [84, 12, 1, "", "fill_attention_const_params_for_rope"], [84, 12, 1, "", "is_valid"], [84, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.CogVLMAttention": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.DeepseekV2Attention": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "postprocess"], [84, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.attention.DiffusersAttention": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "joint_attn_forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[84, 12, 1, "", "fill_none_tensor_list"], [84, 12, 1, "", "get_first_past_key_value"], [84, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[84, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[84, 10, 1, "", "Conv1d"], [84, 10, 1, "", "Conv2d"], [84, 10, 1, "", "Conv3d"], [84, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv3d": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[84, 10, 1, "", "CombinedTimestepLabelEmbeddings"], [84, 10, 1, "", "CombinedTimestepTextProjEmbeddings"], [84, 10, 1, "", "Embedding"], [84, 10, 1, "", "LabelEmbedding"], [84, 10, 1, "", "PixArtAlphaTextProjection"], [84, 10, 1, "", "PromptTuningEmbedding"], [84, 10, 1, "", "SD3PatchEmbed"], [84, 10, 1, "", "TimestepEmbedding"], [84, 10, 1, "", "Timesteps"], [84, 14, 1, "", "get_1d_sincos_pos_embed_from_grid"], [84, 14, 1, "", "get_2d_sincos_pos_embed"], [84, 14, 1, "", "get_2d_sincos_pos_embed_from_grid"], [84, 14, 1, "", "get_timestep_embedding"]], "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Embedding": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "postprocess"], [84, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.LabelEmbedding": [[84, 12, 1, "", "forward"], [84, 12, 1, "", "token_drop"]], "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.SD3PatchEmbed": [[84, 12, 1, "", "cropped_pos_embed"], [84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.TimestepEmbedding": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Timesteps": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[84, 11, 1, "", "ColumnLinear"], [84, 10, 1, "", "Linear"], [84, 10, 1, "", "LinearBase"], [84, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[84, 12, 1, "", "collect_and_bias"], [84, 12, 1, "", "postprocess"], [84, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.linear.LinearBase": [[84, 12, 1, "", "collect_and_bias"], [84, 12, 1, "", "forward"], [84, 12, 1, "", "get_weight"], [84, 12, 1, "", "multiply_and_lora"], [84, 12, 1, "", "multiply_collect"], [84, 12, 1, "", "tp_split_dim"], [84, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[84, 12, 1, "", "collect_and_bias"], [84, 12, 1, "", "multiply_collect"], [84, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.mlp": [[84, 10, 1, "", "FusedGatedMLP"], [84, 10, 1, "", "GatedMLP"], [84, 10, 1, "", "LinearActivation"], [84, 10, 1, "", "LinearApproximateGELU"], [84, 10, 1, "", "LinearGEGLU"], [84, 10, 1, "", "LinearGELU"], [84, 10, 1, "", "LinearSwiGLU"], [84, 10, 1, "", "MLP"], [84, 14, 1, "", "fc_gate_dora"], [84, 14, 1, "", "fc_gate_lora"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[84, 12, 1, "", "fc_gate"], [84, 12, 1, "", "fc_gate_plugin"], [84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearActivation": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearApproximateGELU": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGEGLU": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGELU": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearSwiGLU": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[84, 10, 1, "", "AdaLayerNorm"], [84, 10, 1, "", "AdaLayerNormContinuous"], [84, 10, 1, "", "AdaLayerNormZero"], [84, 10, 1, "", "AdaLayerNormZeroSingle"], [84, 10, 1, "", "GroupNorm"], [84, 10, 1, "", "LayerNorm"], [84, 10, 1, "", "RmsNorm"], [84, 10, 1, "", "SD35AdaLayerNormZeroX"]], "tensorrt_llm.layers.normalization.AdaLayerNorm": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormContinuous": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZero": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.GroupNorm": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX": [[84, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[84, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[84, 12, 1, "", "forward"]], "tensorrt_llm.llmapi": [[71, 10, 1, "", "BatchingType"], [71, 10, 1, "", "BuildCacheConfig"], [71, 10, 1, "", "BuildConfig"], [71, 10, 1, "", "CacheTransceiverConfig"], [71, 10, 1, "", "CalibConfig"], [71, 10, 1, "", "CapacitySchedulerPolicy"], [71, 10, 1, "", "CompletionOutput"], [71, 10, 1, "", "ContextChunkingPolicy"], [71, 10, 1, "", "DisaggregatedParams"], [71, 10, 1, "", "DynamicBatchConfig"], [71, 10, 1, "", "EagleDecodingConfig"], [71, 10, 1, "", "ExtendedRuntimePerfKnobConfig"], [71, 10, 1, "", "GuidedDecodingParams"], [71, 10, 1, "", "KvCacheConfig"], [71, 10, 1, "", "KvCacheRetentionConfig"], [71, 10, 1, "", "LLM"], [71, 11, 1, "", "LlmArgs"], [71, 10, 1, "", "LookaheadDecodingConfig"], [71, 10, 1, "", "MTPDecodingConfig"], [71, 10, 1, "", "MedusaDecodingConfig"], [71, 10, 1, "", "MpiCommSession"], [71, 10, 1, "", "NGramDecodingConfig"], [71, 10, 1, "", "QuantAlgo"], [71, 10, 1, "", "QuantConfig"], [71, 10, 1, "", "RequestError"], [71, 10, 1, "", "RequestOutput"], [71, 10, 1, "", "SamplingParams"], [71, 10, 1, "", "SchedulerConfig"], [71, 10, 1, "", "TorchLlmArgs"], [71, 10, 1, "", "TrtLlmArgs"]], "tensorrt_llm.llmapi.BatchingType": [[71, 11, 1, "", "INFLIGHT"], [71, 11, 1, "", "STATIC"]], "tensorrt_llm.llmapi.BuildCacheConfig": [[71, 12, 1, "", "__init__"], [71, 13, 1, "id8", "cache_root"], [71, 13, 1, "id9", "max_cache_storage_gb"], [71, 13, 1, "id10", "max_records"]], "tensorrt_llm.llmapi.BuildConfig": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "auto_parallel_config"], [71, 11, 1, "", "dry_run"], [71, 11, 1, "", "enable_debug_output"], [71, 11, 1, "", "force_num_profiles"], [71, 12, 1, "", "from_dict"], [71, 12, 1, "", "from_json_file"], [71, 11, 1, "", "gather_context_logits"], [71, 11, 1, "", "gather_generation_logits"], [71, 11, 1, "", "input_timing_cache"], [71, 11, 1, "", "kv_cache_type"], [71, 11, 1, "", "lora_config"], [71, 11, 1, "", "max_batch_size"], [71, 11, 1, "", "max_beam_width"], [71, 11, 1, "", "max_draft_len"], [71, 11, 1, "", "max_encoder_input_len"], [71, 11, 1, "", "max_input_len"], [71, 11, 1, "", "max_num_tokens"], [71, 11, 1, "", "max_prompt_embedding_table_size"], [71, 11, 1, "", "max_seq_len"], [71, 11, 1, "", "monitor_memory"], [71, 11, 1, "", "opt_batch_size"], [71, 11, 1, "", "opt_num_tokens"], [71, 11, 1, "", "output_timing_cache"], [71, 11, 1, "", "plugin_config"], [71, 11, 1, "", "profiling_verbosity"], [71, 11, 1, "", "speculative_decoding_mode"], [71, 11, 1, "", "strongly_typed"], [71, 12, 1, "", "to_dict"], [71, 12, 1, "", "update"], [71, 12, 1, "", "update_from_dict"], [71, 12, 1, "", "update_kv_cache_type"], [71, 11, 1, "", "use_mrope"], [71, 11, 1, "", "use_refit"], [71, 11, 1, "", "use_strip_plan"], [71, 11, 1, "", "visualize_network"], [71, 11, 1, "", "weight_sparsity"], [71, 11, 1, "", "weight_streaming"]], "tensorrt_llm.llmapi.CacheTransceiverConfig": [[71, 15, 1, "", "max_num_tokens"], [71, 11, 1, "", "model_config"]], "tensorrt_llm.llmapi.CalibConfig": [[71, 15, 1, "", "calib_batch_size"], [71, 15, 1, "", "calib_batches"], [71, 15, 1, "", "calib_dataset"], [71, 15, 1, "", "calib_max_seq_length"], [71, 15, 1, "", "device"], [71, 12, 1, "", "from_dict"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "random_seed"], [71, 12, 1, "", "to_dict"], [71, 15, 1, "", "tokenizer_max_seq_length"]], "tensorrt_llm.llmapi.CapacitySchedulerPolicy": [[71, 11, 1, "", "GUARANTEED_NO_EVICT"], [71, 11, 1, "", "MAX_UTILIZATION"], [71, 11, 1, "", "STATIC_BATCH"]], "tensorrt_llm.llmapi.CompletionOutput": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "cumulative_logprob"], [71, 11, 1, "", "disaggregated_params"], [71, 11, 1, "", "finish_reason"], [71, 11, 1, "", "generation_logits"], [71, 11, 1, "", "index"], [71, 13, 1, "id3", "length"], [71, 11, 1, "", "logprobs"], [71, 13, 1, "id4", "logprobs_diff"], [71, 11, 1, "", "prompt_logprobs"], [71, 11, 1, "", "stop_reason"], [71, 11, 1, "", "text"], [71, 13, 1, "id5", "text_diff"], [71, 11, 1, "", "token_ids"], [71, 13, 1, "id6", "token_ids_diff"]], "tensorrt_llm.llmapi.ContextChunkingPolicy": [[71, 11, 1, "", "EQUAL_PROGRESS"], [71, 11, 1, "", "FIRST_COME_FIRST_SERVED"]], "tensorrt_llm.llmapi.DisaggregatedParams": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "ctx_request_id"], [71, 11, 1, "", "draft_tokens"], [71, 11, 1, "", "first_gen_tokens"], [71, 12, 1, "", "get_context_phase_params"], [71, 12, 1, "", "get_request_type"], [71, 11, 1, "", "opaque_state"], [71, 11, 1, "", "request_type"]], "tensorrt_llm.llmapi.DynamicBatchConfig": [[71, 15, 1, "", "dynamic_batch_moving_average_window"], [71, 15, 1, "", "enable_batch_size_tuning"], [71, 15, 1, "", "enable_max_num_tokens_tuning"], [71, 11, 1, "", "model_config"]], "tensorrt_llm.llmapi.EagleDecodingConfig": [[71, 11, 1, "", "decoding_type"], [71, 15, 1, "", "dynamic_tree_max_topK"], [71, 15, 1, "", "eagle3_one_model"], [71, 15, 1, "", "eagle_choices"], [71, 12, 1, "", "from_dict"], [71, 15, 1, "", "greedy_sampling"], [71, 15, 1, "", "max_non_leaves_per_layer"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "num_eagle_layers"], [71, 15, 1, "", "posterior_threshold"], [71, 15, 1, "", "pytorch_eagle_weights_path"], [71, 15, 1, "", "use_dynamic_tree"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig": [[71, 15, 1, "", "cuda_graph_cache_size"], [71, 15, 1, "", "cuda_graph_mode"], [71, 15, 1, "", "enable_context_fmha_fp32_acc"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "multi_block_mode"]], "tensorrt_llm.llmapi.GuidedDecodingParams": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "grammar"], [71, 11, 1, "", "json"], [71, 11, 1, "", "json_object"], [71, 11, 1, "", "regex"], [71, 11, 1, "", "structural_tag"]], "tensorrt_llm.llmapi.KvCacheConfig": [[71, 15, 1, "", "copy_on_partial_reuse"], [71, 15, 1, "", "cross_kv_cache_fraction"], [71, 15, 1, "", "enable_block_reuse"], [71, 15, 1, "", "enable_partial_reuse"], [71, 15, 1, "", "event_buffer_max_size"], [71, 15, 1, "", "free_gpu_memory_fraction"], [71, 15, 1, "", "host_cache_size"], [71, 15, 1, "", "max_attention_window"], [71, 15, 1, "", "max_tokens"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "onboard_blocks"], [71, 15, 1, "", "secondary_offload_min_priority"], [71, 15, 1, "", "sink_token_length"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig": [[71, 10, 1, "", "TokenRangeRetentionConfig"], [71, 12, 1, "", "__init__"], [71, 13, 1, "", "decode_duration_ms"], [71, 13, 1, "", "decode_retention_priority"], [71, 13, 1, "", "directory"], [71, 13, 1, "", "token_range_retention_configs"], [71, 13, 1, "", "transfer_mode"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig": [[71, 12, 1, "", "__init__"], [71, 13, 1, "", "duration_ms"], [71, 13, 1, "", "priority"], [71, 13, 1, "", "token_end"], [71, 13, 1, "", "token_start"]], "tensorrt_llm.llmapi.LLM": [[71, 12, 1, "", "__init__"], [71, 12, 1, "", "generate"], [71, 12, 1, "", "generate_async"], [71, 12, 1, "", "get_kv_cache_events"], [71, 12, 1, "", "get_kv_cache_events_async"], [71, 12, 1, "", "get_stats"], [71, 12, 1, "", "get_stats_async"], [71, 13, 1, "id0", "llm_id"], [71, 12, 1, "", "save"], [71, 12, 1, "", "shutdown"], [71, 13, 1, "id1", "tokenizer"], [71, 13, 1, "id2", "workspace"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig": [[71, 12, 1, "", "__init__"], [71, 12, 1, "", "calculate_speculative_resource"], [71, 11, 1, "", "decoding_type"], [71, 12, 1, "", "from_dict"], [71, 15, 1, "", "max_ngram_size"], [71, 15, 1, "", "max_verification_set_size"], [71, 15, 1, "", "max_window_size"], [71, 11, 1, "", "model_config"], [71, 16, 1, "", "validate_positive_values"]], "tensorrt_llm.llmapi.MTPDecodingConfig": [[71, 11, 1, "", "decoding_type"], [71, 12, 1, "", "from_dict"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "num_nextn_predict_layers"], [71, 15, 1, "", "relaxed_delta"], [71, 15, 1, "", "relaxed_topk"], [71, 15, 1, "", "use_relaxed_acceptance_for_thinking"]], "tensorrt_llm.llmapi.MedusaDecodingConfig": [[71, 11, 1, "", "decoding_type"], [71, 12, 1, "", "from_dict"], [71, 15, 1, "", "medusa_choices"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "num_medusa_heads"]], "tensorrt_llm.llmapi.MpiCommSession": [[71, 12, 1, "", "__init__"], [71, 12, 1, "", "abort"], [71, 12, 1, "", "get_comm"], [71, 12, 1, "", "shutdown"], [71, 12, 1, "", "submit"], [71, 12, 1, "", "submit_sync"]], "tensorrt_llm.llmapi.NGramDecodingConfig": [[71, 11, 1, "", "decoding_type"], [71, 12, 1, "", "from_dict"], [71, 15, 1, "", "is_keep_all"], [71, 15, 1, "", "is_public_pool"], [71, 15, 1, "", "is_use_oldest"], [71, 15, 1, "", "max_matching_ngram_size"], [71, 11, 1, "", "model_config"], [71, 15, 1, "", "prompt_lookup_num_tokens"]], "tensorrt_llm.llmapi.QuantAlgo": [[71, 11, 1, "", "FP8"], [71, 11, 1, "", "FP8_BLOCK_SCALES"], [71, 11, 1, "", "FP8_PER_CHANNEL_PER_TOKEN"], [71, 11, 1, "", "INT8"], [71, 11, 1, "", "MIXED_PRECISION"], [71, 11, 1, "", "NO_QUANT"], [71, 11, 1, "", "NVFP4"], [71, 11, 1, "", "W4A16"], [71, 11, 1, "", "W4A16_AWQ"], [71, 11, 1, "", "W4A16_GPTQ"], [71, 11, 1, "", "W4A8_AWQ"], [71, 11, 1, "", "W4A8_QSERVE_PER_CHANNEL"], [71, 11, 1, "", "W4A8_QSERVE_PER_GROUP"], [71, 11, 1, "", "W8A16"], [71, 11, 1, "", "W8A16_GPTQ"], [71, 11, 1, "", "W8A8_SQ_PER_CHANNEL"], [71, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"], [71, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"], [71, 11, 1, "", "W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"], [71, 11, 1, "", "W8A8_SQ_PER_TENSOR_PLUGIN"]], "tensorrt_llm.llmapi.QuantConfig": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "clamp_val"], [71, 11, 1, "", "exclude_modules"], [71, 12, 1, "", "from_dict"], [71, 11, 1, "", "group_size"], [71, 11, 1, "", "has_zero_point"], [71, 12, 1, "", "is_module_excluded_from_quantization"], [71, 11, 1, "", "kv_cache_quant_algo"], [71, 13, 1, "", "layer_quant_mode"], [71, 11, 1, "", "pre_quant_scale"], [71, 11, 1, "", "quant_algo"], [71, 13, 1, "", "quant_mode"], [71, 11, 1, "", "smoothquant_val"], [71, 12, 1, "", "to_dict"], [71, 11, 1, "", "use_meta_recipe"]], "tensorrt_llm.llmapi.RequestOutput": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "context_logits"], [71, 11, 1, "", "finished"], [71, 11, 1, "", "outputs"], [71, 13, 1, "id7", "prompt"], [71, 11, 1, "", "prompt_token_ids"], [71, 11, 1, "", "request_id"]], "tensorrt_llm.llmapi.SamplingParams": [[71, 12, 1, "", "__init__"], [71, 11, 1, "", "add_special_tokens"], [71, 11, 1, "", "additional_model_outputs"], [71, 11, 1, "", "apply_batched_logits_processor"], [71, 11, 1, "", "bad"], [71, 11, 1, "", "bad_token_ids"], [71, 11, 1, "", "beam_search_diversity_rate"], [71, 11, 1, "", "beam_width_array"], [71, 11, 1, "", "best_of"], [71, 11, 1, "", "detokenize"], [71, 11, 1, "", "early_stopping"], [71, 11, 1, "", "embedding_bias"], [71, 11, 1, "", "end_id"], [71, 11, 1, "", "exclude_input_from_output"], [71, 11, 1, "", "frequency_penalty"], [71, 11, 1, "", "guided_decoding"], [71, 11, 1, "", "ignore_eos"], [71, 11, 1, "", "include_stop_str_in_output"], [71, 11, 1, "", "length_penalty"], [71, 11, 1, "", "logits_processor"], [71, 11, 1, "", "logprobs"], [71, 11, 1, "", "lookahead_config"], [71, 11, 1, "", "max_tokens"], [71, 11, 1, "", "min_p"], [71, 11, 1, "", "min_tokens"], [71, 11, 1, "", "n"], [71, 11, 1, "", "no_repeat_ngram_size"], [71, 11, 1, "", "pad_id"], [71, 11, 1, "", "presence_penalty"], [71, 11, 1, "", "prompt_logprobs"], [71, 11, 1, "", "repetition_penalty"], [71, 11, 1, "", "return_context_logits"], [71, 11, 1, "", "return_encoder_output"], [71, 11, 1, "", "return_generation_logits"], [71, 11, 1, "", "return_perf_metrics"], [71, 11, 1, "", "seed"], [71, 11, 1, "", "skip_special_tokens"], [71, 11, 1, "", "spaces_between_special_tokens"], [71, 11, 1, "", "stop"], [71, 11, 1, "", "stop_token_ids"], [71, 11, 1, "", "temperature"], [71, 11, 1, "", "top_k"], [71, 11, 1, "", "top_p"], [71, 11, 1, "", "top_p_decay"], [71, 11, 1, "", "top_p_min"], [71, 11, 1, "", "top_p_reset_ids"], [71, 11, 1, "", "truncate_prompt_tokens"], [71, 11, 1, "", "use_beam_search"]], "tensorrt_llm.llmapi.SchedulerConfig": [[71, 15, 1, "", "capacity_scheduler_policy"], [71, 15, 1, "", "context_chunking_policy"], [71, 15, 1, "", "dynamic_batch_config"], [71, 11, 1, "", "model_config"]], "tensorrt_llm.llmapi.TorchLlmArgs": [[71, 15, 1, "", "attn_backend"], [71, 15, 1, "", "auto_deploy_config"], [71, 15, 1, "", "autotuner_enabled"], [71, 15, 1, "", "build_config"], [71, 16, 1, "", "convert_load_format"], [71, 15, 1, "", "cuda_graph_batch_sizes"], [71, 15, 1, "", "cuda_graph_max_batch_size"], [71, 15, 1, "", "cuda_graph_padding_enabled"], [71, 11, 1, "", "decoding_config"], [71, 15, 1, "", "disable_overlap_scheduler"], [71, 15, 1, "", "enable_iter_perf_stats"], [71, 15, 1, "", "enable_iter_req_stats"], [71, 15, 1, "", "enable_layerwise_nvtx_marker"], [71, 15, 1, "", "enable_min_latency"], [71, 15, 1, "", "enable_trtllm_sampler"], [71, 13, 1, "", "extra_resource_managers"], [71, 11, 1, "id19", "field_name"], [71, 12, 1, "", "get_pytorch_backend_config"], [71, 15, 1, "", "kv_cache_dtype"], [71, 15, 1, "", "load_format"], [71, 11, 1, "", "max_cpu_loras"], [71, 11, 1, "", "max_lora_rank"], [71, 11, 1, "", "max_loras"], [71, 15, 1, "", "mixed_sampler"], [71, 11, 1, "", "model_config"], [71, 12, 1, "", "model_post_init"], [71, 15, 1, "", "moe_backend"], [71, 15, 1, "", "moe_load_balancer"], [71, 15, 1, "", "moe_max_num_tokens"], [71, 11, 1, "id17", "msg"], [71, 15, 1, "", "print_iter_log"], [71, 15, 1, "", "torch_compile_enable_userbuffers"], [71, 15, 1, "", "torch_compile_enabled"], [71, 15, 1, "", "torch_compile_fullgraph"], [71, 15, 1, "", "torch_compile_inductor_enabled"], [71, 15, 1, "", "torch_compile_piecewise_cuda_graph"], [71, 15, 1, "", "use_cuda_graph"], [71, 15, 1, "", "use_kv_cache"], [71, 16, 1, "", "validate_cuda_graph_config"], [71, 16, 1, "", "validate_cuda_graph_max_batch_size"], [71, 11, 1, "id18", "wrapped_property"]], "tensorrt_llm.llmapi.TrtLlmArgs": [[71, 11, 1, "", "auto_parallel"], [71, 13, 1, "", "auto_parallel_config"], [71, 11, 1, "", "auto_parallel_world_size"], [71, 15, 1, "", "build_config"], [71, 15, 1, "", "calib_config"], [71, 11, 1, "", "decoding_config"], [71, 15, 1, "", "embedding_parallel_mode"], [71, 15, 1, "", "enable_build_cache"], [71, 15, 1, "", "enable_tqdm"], [71, 15, 1, "", "extended_runtime_perf_knob_config"], [71, 15, 1, "", "fast_build"], [71, 11, 1, "id34", "field_name"], [71, 11, 1, "", "max_cpu_loras"], [71, 11, 1, "", "max_lora_rank"], [71, 11, 1, "", "max_loras"], [71, 11, 1, "", "model_config"], [71, 12, 1, "", "model_post_init"], [71, 11, 1, "id32", "msg"], [71, 15, 1, "", "workspace"], [71, 11, 1, "id33", "wrapped_property"]], "tensorrt_llm.models": [[85, 10, 1, "", "BaichuanForCausalLM"], [85, 10, 1, "", "BertForQuestionAnswering"], [85, 10, 1, "", "BertForSequenceClassification"], [85, 10, 1, "", "BertModel"], [85, 10, 1, "", "BloomForCausalLM"], [85, 10, 1, "", "BloomModel"], [85, 10, 1, "", "CLIPVisionTransformer"], [85, 10, 1, "", "ChatGLMConfig"], [85, 10, 1, "", "ChatGLMForCausalLM"], [85, 10, 1, "", "ChatGLMModel"], [85, 10, 1, "", "CogVLMConfig"], [85, 10, 1, "", "CogVLMForCausalLM"], [85, 10, 1, "", "CohereForCausalLM"], [85, 10, 1, "", "DbrxConfig"], [85, 10, 1, "", "DbrxForCausalLM"], [85, 10, 1, "", "DecoderModel"], [85, 10, 1, "", "DeepseekForCausalLM"], [85, 10, 1, "", "DeepseekV2ForCausalLM"], [85, 10, 1, "", "DiT"], [85, 10, 1, "", "EagleForCausalLM"], [85, 10, 1, "", "EncoderModel"], [85, 10, 1, "", "FalconConfig"], [85, 10, 1, "", "FalconForCausalLM"], [85, 10, 1, "", "FalconModel"], [85, 10, 1, "", "GPTConfig"], [85, 10, 1, "", "GPTForCausalLM"], [85, 10, 1, "", "GPTJConfig"], [85, 10, 1, "", "GPTJForCausalLM"], [85, 10, 1, "", "GPTJModel"], [85, 10, 1, "", "GPTModel"], [85, 10, 1, "", "GPTNeoXForCausalLM"], [85, 10, 1, "", "GPTNeoXModel"], [85, 10, 1, "", "GemmaConfig"], [85, 10, 1, "", "GemmaForCausalLM"], [85, 10, 1, "", "LLaMAConfig"], [85, 10, 1, "", "LLaMAForCausalLM"], [85, 10, 1, "", "LLaMAModel"], [85, 10, 1, "", "LlavaNextVisionConfig"], [85, 10, 1, "", "LlavaNextVisionWrapper"], [85, 10, 1, "", "MLLaMAForCausalLM"], [85, 10, 1, "", "MPTForCausalLM"], [85, 10, 1, "", "MPTModel"], [85, 10, 1, "", "MambaForCausalLM"], [85, 10, 1, "", "MedusaConfig"], [85, 10, 1, "", "MedusaForCausalLm"], [85, 10, 1, "", "OPTForCausalLM"], [85, 10, 1, "", "OPTModel"], [85, 10, 1, "", "Phi3ForCausalLM"], [85, 10, 1, "", "Phi3Model"], [85, 10, 1, "", "PhiForCausalLM"], [85, 10, 1, "", "PhiModel"], [85, 10, 1, "", "PretrainedConfig"], [85, 10, 1, "", "PretrainedModel"], [85, 10, 1, "", "ReDrafterForCausalLM"], [85, 10, 1, "", "RecurrentGemmaForCausalLM"], [85, 11, 1, "", "RobertaForQuestionAnswering"], [85, 11, 1, "", "RobertaForSequenceClassification"], [85, 11, 1, "", "RobertaModel"], [85, 10, 1, "", "SD3Transformer2DModel"], [85, 10, 1, "", "SpeculativeDecodingMode"], [85, 10, 1, "", "WhisperEncoder"]], "tensorrt_llm.models.BaichuanForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "quantize"]], "tensorrt_llm.models.BertForQuestionAnswering": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.CLIPVisionTransformer": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMConfig": [[85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "quantize"]], "tensorrt_llm.models.ChatGLMModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.CogVLMConfig": [[85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.CogVLMForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "default_plugin_config"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "quantize"]], "tensorrt_llm.models.CohereForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DbrxConfig": [[85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.DbrxForCausalLM": [[85, 11, 1, "", "config_class"]], "tensorrt_llm.models.DecoderModel": [[85, 12, 1, "", "check_config"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "precompute_relative_attention_bias"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.DeepseekForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DeepseekV2ForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DiT": [[85, 12, 1, "", "check_config"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "forward_with_cfg"], [85, 12, 1, "", "forward_without_cfg"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "unpatchify"]], "tensorrt_llm.models.EagleForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[85, 12, 1, "", "check_config"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "precompute_relative_attention_bias"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "use_lora"], [85, 12, 1, "", "use_prompt_tuning"]], "tensorrt_llm.models.FalconConfig": [[85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.FalconForCausalLM": [[85, 12, 1, "", "check_config"], [85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.FalconModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTConfig": [[85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "from_nemo"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "from_nemo"], [85, 12, 1, "", "quantize"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJConfig": [[85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTJForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.GPTJModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaConfig": [[85, 11, 1, "", "GEMMA2_ADDED_FIELDS"], [85, 11, 1, "", "GEMMA3_ADDED_FIELDS"], [85, 11, 1, "", "GEMMA_ADDED_FIELDS"], [85, 11, 1, "", "VERBATIM"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "gemma2_config"], [85, 12, 1, "", "gemma3_config"], [85, 12, 1, "", "get_hf_config"], [85, 13, 1, "", "is_gemma_2"], [85, 13, 1, "", "is_gemma_3"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GemmaForCausalLM": [[85, 11, 1, "", "NATIVE_QUANT_FLOW"], [85, 12, 1, "", "assert_valid_quant_algo"], [85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "quantize"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAConfig": [[85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "from_meta_ckpt"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.LLaMAForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "default_plugin_config"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "from_meta_ckpt"], [85, 12, 1, "", "quantize"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.LlavaNextVisionConfig": [[85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LlavaNextVisionWrapper": [[85, 12, 1, "", "forward"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.MLLaMAForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.MPTForCausalLM": [[85, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaConfig": [[85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "to_dict"]], "tensorrt_llm.models.MedusaForCausalLm": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.OPTForCausalLM": [[85, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.Phi3ForCausalLM": [[85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.Phi3Model": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[85, 12, 1, "", "check_config"], [85, 11, 1, "", "config_class"], [85, 12, 1, "", "from_hugging_face"], [85, 12, 1, "", "use_lora"]], "tensorrt_llm.models.PhiModel": [[85, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[85, 12, 1, "", "create_runtime_defaults"], [85, 12, 1, "", "for_each_rank"], [85, 12, 1, "", "from_checkpoint"], [85, 12, 1, "", "from_dict"], [85, 12, 1, "", "from_json_file"], [85, 12, 1, "", "get_config_group"], [85, 12, 1, "", "has_config_group"], [85, 13, 1, "", "kv_dtype"], [85, 13, 1, "", "quant_algo"], [85, 13, 1, "", "quant_mode"], [85, 12, 1, "", "set_if_not_exist"], [85, 12, 1, "", "set_rank"], [85, 12, 1, "", "to_dict"], [85, 12, 1, "", "to_json_file"], [85, 12, 1, "", "to_layer_quant_config"]], "tensorrt_llm.models.PretrainedModel": [[85, 12, 1, "", "check_config"], [85, 12, 1, "", "from_checkpoint"], [85, 12, 1, "", "from_config"], [85, 12, 1, "", "load"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "quantize"], [85, 12, 1, "", "release"], [85, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.ReDrafterForCausalLM": [[85, 12, 1, "", "forward"], [85, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.RecurrentGemmaForCausalLM": [[85, 12, 1, "", "forward"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "prepare_recurrent_inputs"]], "tensorrt_llm.models.SD3Transformer2DModel": [[85, 13, 1, "", "attn_processors"], [85, 11, 1, "", "config_class"], [85, 12, 1, "", "disable_forward_chunking"], [85, 12, 1, "", "enable_forward_chunking"], [85, 12, 1, "", "forward"], [85, 12, 1, "", "from_pretrained"], [85, 12, 1, "", "fuse_qkv_projections"], [85, 12, 1, "", "load"], [85, 12, 1, "", "prepare_inputs"], [85, 12, 1, "", "set_attn_processor"], [85, 12, 1, "", "unfuse_qkv_projections"]], "tensorrt_llm.models.SpeculativeDecodingMode": [[85, 11, 1, "", "DRAFT_TOKENS_EXTERNAL"], [85, 11, 1, "", "EAGLE"], [85, 11, 1, "", "EXPLICIT_DRAFT_TOKENS"], [85, 11, 1, "", "LOOKAHEAD_DECODING"], [85, 11, 1, "", "MEDUSA"], [85, 11, 1, "", "NGRAM"], [85, 11, 1, "", "NONE"], [85, 12, 1, "", "from_arguments"]], "tensorrt_llm.models.WhisperEncoder": [[85, 12, 1, "", "forward"], [85, 12, 1, "", "precompute_relative_attention_bias"], [85, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[86, 10, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[86, 12, 1, "", "to_legacy_setting"]], "tensorrt_llm.quantization": [[87, 10, 1, "", "QuantAlgo"], [87, 10, 1, "", "QuantMode"], [87, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[88, 10, 1, "", "ChatGLMGenerationSession"], [88, 10, 1, "", "EncDecModelRunner"], [88, 10, 1, "", "GenerationSequence"], [88, 10, 1, "", "GenerationSession"], [88, 10, 1, "", "KVCacheManager"], [88, 10, 1, "", "LogitsProcessor"], [88, 10, 1, "", "LogitsProcessorList"], [88, 10, 1, "", "ModelConfig"], [88, 10, 1, "", "ModelRunner"], [88, 10, 1, "", "ModelRunnerCpp"], [88, 10, 1, "", "MultimodalModelRunner"], [88, 10, 1, "", "QWenForCausalLMGenerationSession"], [88, 10, 1, "", "SamplingConfig"], [88, 10, 1, "", "Session"], [88, 10, 1, "", "StoppingCriteria"], [88, 10, 1, "", "StoppingCriteriaList"], [88, 10, 1, "", "TensorInfo"], [88, 14, 1, "", "decode_words_list"]], "tensorrt_llm.runtime.EncDecModelRunner": [[88, 12, 1, "", "encoder_run"], [88, 12, 1, "", "from_engine"], [88, 12, 1, "", "generate"], [88, 12, 1, "", "process_input"]], "tensorrt_llm.runtime.GenerationSequence": [[88, 12, 1, "", "get_batch_idx"], [88, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[88, 11, 1, "", "batch_size"], [88, 11, 1, "", "buffer_allocated"], [88, 13, 1, "", "context_mem_size"], [88, 13, 1, "", "conv_kernel"], [88, 13, 1, "", "cross_attention"], [88, 11, 1, "", "cuda_graph_mode"], [88, 12, 1, "", "cuda_stream_guard"], [88, 11, 1, "", "debug_mode"], [88, 11, 1, "", "debug_tensors_to_save"], [88, 12, 1, "", "decode"], [88, 12, 1, "", "decode_batch"], [88, 12, 1, "", "decode_regular"], [88, 12, 1, "", "decode_stream"], [88, 11, 1, "", "device"], [88, 13, 1, "", "dtype"], [88, 12, 1, "", "dump_debug_buffers"], [88, 12, 1, "", "early_stop_criteria"], [88, 13, 1, "", "engine_inspector"], [88, 12, 1, "", "filter_medusa_logits"], [88, 12, 1, "", "finalize_decoder"], [88, 12, 1, "", "find_best_medusa_path"], [88, 13, 1, "", "first_layer"], [88, 13, 1, "", "gather_context_logits"], [88, 13, 1, "", "gather_generation_logits"], [88, 13, 1, "", "gemm_allreduce_plugin"], [88, 12, 1, "", "get_next_medusa_tokens"], [88, 12, 1, "", "get_num_heads_kv"], [88, 12, 1, "", "handle_per_step"], [88, 13, 1, "", "has_position_embedding"], [88, 13, 1, "", "has_token_type_embedding"], [88, 13, 1, "", "head_size"], [88, 13, 1, "", "hidden_size"], [88, 13, 1, "", "is_medusa_mode"], [88, 13, 1, "", "is_redrafter_mode"], [88, 13, 1, "", "kv_cache_type"], [88, 13, 1, "", "last_layer"], [88, 12, 1, "", "locate_accepted_draft_tokens"], [88, 11, 1, "", "mapping"], [88, 13, 1, "", "max_draft_tokens"], [88, 13, 1, "", "max_prompt_embedding_table_size"], [88, 12, 1, "", "medusa_decode_and_verify"], [88, 11, 1, "", "medusa_paths"], [88, 11, 1, "", "medusa_position_offsets"], [88, 11, 1, "", "medusa_temperature"], [88, 11, 1, "", "medusa_topks"], [88, 11, 1, "", "medusa_tree_ids"], [88, 12, 1, "", "next_medusa_input_ids"], [88, 11, 1, "", "num_draft_tokens"], [88, 13, 1, "", "num_heads"], [88, 13, 1, "", "num_layers"], [88, 13, 1, "", "num_medusa_heads"], [88, 13, 1, "", "paged_kv_cache"], [88, 13, 1, "", "paged_state"], [88, 12, 1, "", "pp_communicate_final_output_ids"], [88, 12, 1, "", "pp_communicate_new_tokens"], [88, 12, 1, "", "process_logits_including_draft"], [88, 13, 1, "", "profiler"], [88, 13, 1, "", "quant_mode"], [88, 13, 1, "", "remove_input_padding"], [88, 12, 1, "", "reorder_kv_cache_for_beam_search"], [88, 13, 1, "", "rnn_conv_dim_size"], [88, 13, 1, "", "rnn_head_size"], [88, 13, 1, "", "rnn_hidden_size"], [88, 11, 1, "", "runtime"], [88, 12, 1, "", "setup"], [88, 13, 1, "", "state_dtype"], [88, 13, 1, "", "state_size"], [88, 13, 1, "", "tokens_per_block"], [88, 12, 1, "", "update_output_ids_by_offset"], [88, 13, 1, "", "use_gemm_allreduce_plugin"], [88, 13, 1, "", "use_gpt_attention_plugin"], [88, 13, 1, "", "use_kv_cache"], [88, 13, 1, "", "use_lora_plugin"], [88, 13, 1, "", "use_mamba_conv1d_plugin"], [88, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[88, 12, 1, "", "add_sequence"], [88, 12, 1, "", "get_block_offsets"], [88, 12, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[88, 11, 1, "", "conv_kernel"], [88, 11, 1, "", "cross_attention"], [88, 11, 1, "", "dtype"], [88, 11, 1, "", "gather_context_logits"], [88, 11, 1, "", "gather_generation_logits"], [88, 11, 1, "", "gemm_allreduce_plugin"], [88, 11, 1, "", "gpt_attention_plugin"], [88, 11, 1, "", "gpu_weights_percent"], [88, 11, 1, "", "has_position_embedding"], [88, 11, 1, "", "has_token_type_embedding"], [88, 11, 1, "", "head_size"], [88, 11, 1, "", "hidden_size"], [88, 11, 1, "", "kv_cache_type"], [88, 11, 1, "", "language_adapter_config"], [88, 11, 1, "", "layer_types"], [88, 11, 1, "", "lora_plugin"], [88, 11, 1, "", "lora_target_modules"], [88, 11, 1, "", "mamba_conv1d_plugin"], [88, 11, 1, "", "max_batch_size"], [88, 11, 1, "", "max_beam_width"], [88, 11, 1, "", "max_medusa_tokens"], [88, 11, 1, "", "max_prompt_embedding_table_size"], [88, 11, 1, "", "model_name"], [88, 11, 1, "", "num_heads"], [88, 11, 1, "", "num_kv_heads"], [88, 11, 1, "", "num_kv_heads_per_cross_attn_layer"], [88, 11, 1, "", "num_kv_heads_per_layer"], [88, 11, 1, "", "num_layers"], [88, 11, 1, "", "num_medusa_heads"], [88, 11, 1, "", "paged_state"], [88, 11, 1, "", "quant_mode"], [88, 11, 1, "", "redrafter_draft_len_per_beam"], [88, 11, 1, "", "redrafter_num_beams"], [88, 11, 1, "", "remove_input_padding"], [88, 11, 1, "", "rnn_conv_dim_size"], [88, 11, 1, "", "rnn_head_size"], [88, 11, 1, "", "rnn_hidden_size"], [88, 11, 1, "", "skip_cross_attn_blocks"], [88, 11, 1, "", "skip_cross_kv"], [88, 11, 1, "", "state_dtype"], [88, 11, 1, "", "state_size"], [88, 11, 1, "", "tokens_per_block"], [88, 11, 1, "", "trtllm_modules_to_hf_modules"], [88, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[88, 13, 1, "", "dtype"], [88, 12, 1, "", "from_dir"], [88, 12, 1, "", "from_engine"], [88, 13, 1, "", "gather_context_logits"], [88, 13, 1, "", "gather_generation_logits"], [88, 12, 1, "", "generate"], [88, 13, 1, "", "hidden_size"], [88, 13, 1, "", "mapping"], [88, 13, 1, "", "max_prompt_embedding_table_size"], [88, 13, 1, "", "max_sequence_length"], [88, 13, 1, "", "num_heads"], [88, 13, 1, "", "num_layers"], [88, 13, 1, "", "remove_input_padding"], [88, 12, 1, "", "serialize_engine"], [88, 13, 1, "", "use_lora_plugin"], [88, 13, 1, "", "vocab_size"], [88, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[88, 13, 1, "", "dtype"], [88, 12, 1, "", "from_dir"], [88, 13, 1, "", "gather_context_logits"], [88, 13, 1, "", "gather_generation_logits"], [88, 12, 1, "", "generate"], [88, 13, 1, "", "hidden_size"], [88, 13, 1, "", "max_prompt_embedding_table_size"], [88, 13, 1, "", "max_sequence_length"], [88, 13, 1, "", "num_heads"], [88, 13, 1, "", "num_layers"], [88, 13, 1, "", "remove_input_padding"], [88, 13, 1, "", "vocab_size"], [88, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.MultimodalModelRunner": [[88, 13, 1, "", "audio_engine_dir"], [88, 13, 1, "", "cpp_e2e"], [88, 13, 1, "", "cpp_llm_only"], [88, 12, 1, "", "generate"], [88, 12, 1, "", "get_audio_features"], [88, 12, 1, "", "get_rope_index"], [88, 12, 1, "", "get_visual_features"], [88, 12, 1, "", "init_audio_encoder"], [88, 12, 1, "", "init_image_encoder"], [88, 12, 1, "", "init_llm"], [88, 12, 1, "", "init_processor"], [88, 12, 1, "", "init_tokenizer"], [88, 13, 1, "", "llm_engine_dir"], [88, 12, 1, "", "load_test_audio"], [88, 12, 1, "", "load_test_data"], [88, 12, 1, "", "prepare_position_ids_for_cogvlm"], [88, 12, 1, "", "preprocess"], [88, 12, 1, "", "ptuning_setup"], [88, 12, 1, "", "ptuning_setup_fuyu"], [88, 12, 1, "", "ptuning_setup_llava_next"], [88, 12, 1, "", "ptuning_setup_phi3"], [88, 12, 1, "", "ptuning_setup_pixtral"], [88, 13, 1, "", "python_e2e"], [88, 12, 1, "", "run"], [88, 12, 1, "", "setup_fake_prompts"], [88, 12, 1, "", "setup_fake_prompts_qwen2vl"], [88, 12, 1, "", "setup_fake_prompts_vila"], [88, 12, 1, "", "setup_inputs"], [88, 12, 1, "", "split_prompt_by_images"], [88, 12, 1, "", "tokenizer_image_token"], [88, 12, 1, "", "video_preprocess"], [88, 13, 1, "", "visual_engine_dir"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[88, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[88, 11, 1, "", "bad_words_list"], [88, 11, 1, "", "beam_search_diversity_rate"], [88, 11, 1, "", "early_stopping"], [88, 11, 1, "", "end_id"], [88, 11, 1, "", "frequency_penalty"], [88, 11, 1, "", "length_penalty"], [88, 11, 1, "", "max_attention_window_size"], [88, 11, 1, "", "max_new_tokens"], [88, 11, 1, "", "min_length"], [88, 11, 1, "", "min_p"], [88, 11, 1, "", "no_repeat_ngram_size"], [88, 11, 1, "", "num_beams"], [88, 11, 1, "", "num_return_sequences"], [88, 11, 1, "", "output_cum_log_probs"], [88, 11, 1, "", "output_log_probs"], [88, 11, 1, "", "output_sequence_lengths"], [88, 11, 1, "", "pad_id"], [88, 11, 1, "", "presence_penalty"], [88, 11, 1, "", "random_seed"], [88, 11, 1, "", "repetition_penalty"], [88, 11, 1, "", "return_dict"], [88, 11, 1, "", "sink_token_length"], [88, 11, 1, "", "stop_words_list"], [88, 11, 1, "", "temperature"], [88, 11, 1, "", "top_k"], [88, 11, 1, "", "top_p"], [88, 11, 1, "", "top_p_decay"], [88, 11, 1, "", "top_p_min"], [88, 11, 1, "", "top_p_reset_ids"], [88, 12, 1, "", "update"], [88, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[88, 13, 1, "", "context"], [88, 13, 1, "", "context_mem_size"], [88, 13, 1, "", "engine"], [88, 12, 1, "", "from_engine"], [88, 12, 1, "", "from_serialized_engine"], [88, 12, 1, "", "infer_shapes"], [88, 12, 1, "", "run"], [88, 13, 1, "", "runtime"], [88, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[88, 11, 1, "", "dtype"], [88, 11, 1, "", "name"], [88, 12, 1, "", "numel"], [88, 11, 1, "", "shape"], [88, 12, 1, "", "squeeze"], [88, 12, 1, "", "view"]], "trtllm-serve-disaggregated": [[31, 17, 1, "cmdoption-trtllm-serve-disaggregated-c", "--config_file"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-l", "--log_level"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-m", "--metadata_server_config_file"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-r", "--request_timeout"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-t", "--server_start_timeout"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-c", "-c"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-l", "-l"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-m", "-m"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-r", "-r"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated-t", "-t"]], "trtllm-serve-disaggregated_mpi_worker": [[31, 17, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "--config_file"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", "--log_level"], [31, 17, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "-c"]], "trtllm-serve-serve": [[31, 17, 1, "cmdoption-trtllm-serve-serve-backend", "--backend"], [31, 17, 1, "cmdoption-trtllm-serve-serve-cluster_size", "--cluster_size"], [31, 17, 1, "cmdoption-trtllm-serve-serve-ep_size", "--ep_size"], [31, 17, 1, "cmdoption-trtllm-serve-serve-extra_llm_api_options", "--extra_llm_api_options"], [31, 17, 1, "cmdoption-trtllm-serve-serve-gpus_per_node", "--gpus_per_node"], [31, 17, 1, "cmdoption-trtllm-serve-serve-host", "--host"], [31, 17, 1, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [31, 17, 1, "cmdoption-trtllm-serve-serve-log_level", "--log_level"], [31, 17, 1, "cmdoption-trtllm-serve-serve-max_batch_size", "--max_batch_size"], [31, 17, 1, "cmdoption-trtllm-serve-serve-max_beam_width", "--max_beam_width"], [31, 17, 1, "cmdoption-trtllm-serve-serve-max_num_tokens", "--max_num_tokens"], [31, 17, 1, "cmdoption-trtllm-serve-serve-max_seq_len", "--max_seq_len"], [31, 17, 1, "cmdoption-trtllm-serve-serve-metadata_server_config_file", "--metadata_server_config_file"], [31, 17, 1, "cmdoption-trtllm-serve-serve-num_postprocess_workers", "--num_postprocess_workers"], [31, 17, 1, "cmdoption-trtllm-serve-serve-port", "--port"], [31, 17, 1, "cmdoption-trtllm-serve-serve-pp_size", "--pp_size"], [31, 17, 1, "cmdoption-trtllm-serve-serve-reasoning_parser", "--reasoning_parser"], [31, 17, 1, "cmdoption-trtllm-serve-serve-server_role", "--server_role"], [31, 17, 1, "cmdoption-trtllm-serve-serve-tokenizer", "--tokenizer"], [31, 17, 1, "cmdoption-trtllm-serve-serve-tp_size", "--tp_size"], [31, 17, 1, "cmdoption-trtllm-serve-serve-trust_remote_code", "--trust_remote_code"], [31, 17, 1, "cmdoption-trtllm-serve-serve-arg-MODEL", "MODEL"]]}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "class", "C++ class"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"], "15": ["py", "pydantic_field", "Python field"], "16": ["py", "pydantic_validator", "Python validator"], "17": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:class", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function", "15": "py:pydantic_field", "16": "py:pydantic_validator", "17": "std:cmdoption"}, "terms": {"": [0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 32, 46, 47, 51, 52, 53, 59, 66, 70, 71, 72, 74, 76, 78, 79, 80, 81, 83, 84, 85, 88, 89, 90, 91, 93, 94, 96, 97, 98, 99], "0": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 13, 15, 16, 17, 19, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 63, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 88, 89, 90, 92, 93, 95, 96, 100], "00": [16, 26, 56, 57, 58, 74, 75, 76, 93], "000": [20, 74], "0000": [74, 76], "0007503032684326172": 31, "001": 52, "0012": 74, "0017": 75, "003": 75, "0047": 93, "005": 75, "0070": 93, "0071": 93, "0096": 93, "00978": 91, "01": [25, 26, 56, 57, 58, 74, 75, 90, 94], "014": 23, "0158": 76, "016": 75, "0162": 78, "0165": 80, "017": 75, "02": [75, 94], "021": 75, "022": 75, "0235": 93, "0260": 93, "0273": 93, "028": 75, "0294": 93, "03": [80, 93, 94], "032": 26, "0339": 75, "03762": 83, "03961": 4, "03x": 27, "04": [67, 68, 75, 92, 94, 95], "043": 75, "0449": 93, "045471": 29, "0461": 20, "0463": 75, "05": [75, 83, 84, 85, 93, 94], "05100": 83, "0523": 93, "055": 75, "0554": 76, "0560": 93, "0563": 75, "06": [26, 74, 75, 83, 84], "0630": 93, "0669": 20, "068": 75, "0682": 93, "0689e": 74, "07": [25, 26, 75, 94], "0704": 76, "0713": 93, "0723": 93, "0732": 93, "0758": 20, "0772": 20, "0776": 93, "08": [26, 75, 80], "0804": 93, "081947": 29, "082": 75, "0838": 75, "0881": 81, "089": 75, "09": [26, 93], "0903": 93, "0910": 93, "092": 75, "092314": 29, "092623": 29, "093256": 29, "09353": 10, "0964": 75, "09685": 10, "097": 75, "09f": [0, 1], "0b": 2, "0e": 6, "0f": [0, 6, 71], "0rc1": 74, "0u": 1, "0x": 22, "0x0000000000000000": 94, "1": [0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 34, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 63, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 80, 82, 83, 84, 85, 87, 88, 89, 92, 93, 95, 99], "10": [0, 9, 10, 12, 20, 25, 26, 27, 29, 31, 37, 39, 44, 45, 52, 60, 65, 68, 71, 74, 75, 76, 78, 81, 83, 90, 92, 93], "100": [0, 9, 20, 29, 31, 39, 57, 73, 74, 76, 89], "1000": [0, 73, 74, 75, 76], "10000": [83, 84, 85], "1003": 94, "100gb": 28, "101": 9, "101029": 29, "101230": 52, "101978": 75, "102": [9, 22], "1024": [1, 6, 15, 20, 23, 25, 29, 30, 37, 44, 45, 52, 55, 71, 74, 75, 76, 80, 83, 84, 93], "103": [9, 29], "104": 94, "10438": 91, "1045": 93, "1047": 74, "1050": 93, "1051": 76, "1059": 74, "106563": 75, "1072": 93, "107501": 75, "10764": 54, "10774": 0, "1079": 19, "108": 75, "1082": 93, "10858": 37, "109": 29, "10b": [70, 83, 94], "10m": 22, "11": [0, 10, 12, 20, 23, 25, 29, 65, 74, 75, 78, 83, 93], "11023": 74, "110804": 75, "110b": 94, "111": [22, 26], "111302": 75, "111618": 75, "111668": 75, "1118": 94, "1123": 94, "1134": 90, "113420": 29, "1135": 93, "114": 29, "1141": 93, "1148": 94, "11489": 20, "11490": 74, "115": 29, "1151": 20, "115378": 29, "115716": 75, "1160": [31, 38], "117": 75, "1178": 74, "1181": 94, "1183": 94, "119": [29, 74], "11943": 74, "11947": 37, "1196": 20, "119648": 29, "11b": [92, 94], "12": [0, 10, 15, 22, 26, 29, 37, 65, 67, 68, 74, 75, 78, 80, 83, 93], "120": 29, "1207": 54, "1212": 93, "121847": 74, "1219": 20, "122": 74, "1225": 83, "12288": 74, "123": [31, 39, 40], "1234": [71, 85], "1239": 94, "1242": 94, "1248": 94, "125": [29, 74], "1252": [19, 74], "1256": 94, "125m": [12, 15], "126": 74, "1267": 94, "127": 83, "1272": 93, "128": [0, 1, 5, 9, 10, 13, 16, 20, 21, 22, 23, 24, 25, 26, 29, 31, 37, 39, 40, 50, 57, 71, 74, 75, 94], "1284": 94, "1287": 78, "129": 29, "1290": 93, "1291504": 76, "1293": 19, "12945": 20, "129498": 20, "13": [5, 10, 24, 28, 29, 65, 74, 75, 76, 83, 93], "1300": 46, "13044": 54, "131072": [74, 76], "13195": 74, "132": [74, 75], "1323": 94, "1328": 94, "1329": 94, "133": 94, "13368": 74, "1337": 94, "1341": 20, "1343": 94, "1344": 94, "13525": 74, "13598": 74, "1363": 54, "137": 74, "1378": 93, "138": 29, "139": 75, "1392": 94, "13b": 22, "14": [10, 15, 25, 29, 65, 74, 75, 78, 80, 81, 93], "140g": 19, "141": 23, "1418": 74, "141gb": [21, 75], "142": [28, 29], "1424": 94, "1436": [20, 94], "1437": 93, "144": 78, "1446": 94, "1447": 94, "14480": 74, "1449": 94, "145": [80, 81], "1459": 93, "146": [80, 81], "1467": 94, "147": [76, 78, 80, 81], "1480": 94, "1486": 94, "149": [93, 94], "15": [10, 26, 29, 65, 74, 75, 81, 83, 93], "150": 73, "1500": 75, "15043": 37, "1514": 94, "152": 29, "1529": 94, "1534": 94, "1535": 94, "1536": 20, "1537": 94, "1539": 94, "154": 26, "1552": 94, "1556": 93, "15585": 74, "1562": 94, "1564": [76, 80, 81], "158": 20, "1583": 94, "1584": 20, "1585": 76, "15889": 54, "1589": 94, "1590": 94, "1597": 78, "15u": 28, "16": [0, 5, 10, 11, 12, 16, 20, 22, 25, 26, 29, 31, 34, 36, 56, 57, 58, 65, 66, 74, 75, 76, 77, 83, 84, 85, 90, 91, 93], "160": [29, 94], "1607": 74, "161": [31, 38, 74], "162": 29, "1625": 78, "1626": 94, "163": 21, "1637": 94, "16384": [78, 80], "164": [26, 29], "1642": 94, "1650": 94, "1660": 94, "1669": 94, "167": [74, 75], "1672": 93, "1674": 94, "1675": 94, "167507": 29, "1676": 94, "168": 26, "16e": 92, "16x": [27, 90], "17": [0, 2, 10, 20, 29, 65, 74, 75, 80, 93, 95], "1706": 83, "171": 29, "1721": 93, "1723": 94, "17233": 20, "173": 26, "1732": 94, "17323": 91, "1738": 94, "174": 75, "1741966075": 89, "1742": 94, "17453": 30, "17453v3": 1, "175": 75, "175b": 23, "176": 74, "176064": 20, "1762": 94, "1799": 94, "17b": 92, "18": [2, 10, 28, 29, 65, 72, 74, 75, 93], "180": [26, 90], "180000000": 0, "180b": [25, 74], "1811": 54, "1815": 94, "181540": 20, "182": 75, "1822": 37, "183": 75, "1834": 94, "184": 75, "185": [22, 74], "1851": 94, "18527": 37, "18533": 54, "18563": 74, "1861": 81, "1866": 81, "187": 29, "1885": 76, "1886": 94, "1889": 54, "1897": 94, "19": [2, 20, 29, 65, 75, 81, 93], "1900": 54, "1909": 94, "191": 75, "192": [21, 29], "1921": 20, "1926": 94, "1937": 94, "1939": 94, "1944": 80, "1950": 29, "1953": 94, "1959": 74, "1963": 29, "198": 26, "1985": 94, "1987": 94, "1993": 93, "1999": 94, "1_405b": 16, "1_70b": 16, "1b": [31, 34, 36, 39, 41, 43, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 60, 61, 63, 67, 68, 70, 89, 95], "1d": [5, 83, 88], "1e": [15, 83, 84, 85], "1e20f": 1, "1g": 93, "1gb": 2, "1k": [20, 26, 27, 28], "1m": 81, "1st": [22, 83, 90], "1u": [0, 1], "1x": 26, "1xh200": 21, "1ytic": 94, "2": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 19, 21, 22, 23, 25, 26, 27, 28, 31, 43, 45, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 65, 67, 68, 70, 71, 74, 75, 77, 78, 80, 81, 83, 85, 88, 91, 92, 93, 99], "20": [1, 6, 12, 13, 28, 29, 31, 61, 63, 74, 75, 76, 80, 83, 88, 93], "200": [23, 29, 71, 88], "2000": [28, 75], "20000": 75, "200mb": 28, "2017": 80, "2018": 94, "202": 29, "2023": [21, 93], "2024": 26, "2025": [20, 26, 74], "2028": 94, "203": 75, "2033": 81, "2039": 94, "204": [26, 75], "2040": 94, "2044": [80, 81], "2045": 80, "2048": [15, 20, 21, 23, 24, 28, 30, 50, 71, 74, 75, 76, 78, 79, 80, 81, 85, 88, 93, 94], "205": 29, "2056": 94, "206": 75, "20627": 37, "20685": 74, "2079": 93, "208": 75, "2081": [78, 80, 94], "2087": 94, "2089": 75, "209": [29, 75], "20b": 94, "21": [12, 25, 26, 29, 75, 80, 93, 94], "2101": 4, "2102": 75, "2106": 10, "2107": [54, 93], "210g": 19, "211": 26, "2113": 94, "212": 29, "2135": 94, "21367": 54, "2152": 94, "2158": 75, "2168": 20, "2169": 94, "21747": 74, "2176": 75, "21764": 74, "2182": 94, "2191": 94, "22": [28, 29, 33, 75, 83, 93], "22000": 75, "22056": 74, "221": 74, "2210": 91, "2211": [83, 91], "2219": 94, "22213": 74, "2225": 93, "2232": 94, "224": 84, "2243": 94, "2263": 94, "227": 24, "2288": 94, "2294": 94, "22x": 27, "23": [29, 74, 75, 93, 94], "2305": 93, "2306": 91, "2309": [1, 30], "232": [24, 29], "2337": 54, "234": 29, "2352": 94, "2357": 94, "236": 26, "2366": 94, "2370": 94, "2373": 94, "2379": 94, "2388": 94, "239": 26, "2397": 74, "24": [0, 29, 67, 68, 75, 93, 94, 95], "240": 75, "2401": 0, "2402": 10, "24189": 75, "2419": 94, "242": 75, "2425": 94, "243": 29, "2439": 94, "245": 26, "2458": 94, "246": 29, "2461": 80, "2466": 80, "2473": 94, "2474": [78, 80], "2484": 94, "2485": 94, "2487": 75, "249": 26, "24mib": 29, "25": [24, 26, 29, 74, 75, 92, 94], "250": [20, 26, 29], "2500": 75, "25032": 74, "251": 29, "252u": 28, "253": [26, 75], "2552": 94, "256": [1, 20, 21, 24, 28, 29, 60, 71, 74, 75, 83, 93, 94], "25603": 74, "2573": 94, "2581": [78, 80], "2590780": 74, "259840": 90, "26": [29, 74, 75, 78, 89], "260": 75, "2602": 37, "2628": [80, 81], "263": [21, 37, 54], "2640": 81, "2649": 93, "2671": 20, "2677": 94, "26778": 74, "2679": 78, "2685": 94, "2688": 54, "2691": 94, "27": [75, 94], "270": 75, "2712": 94, "274": [20, 94], "2742": 76, "275": 94, "27556": 54, "276": 75, "278": [37, 54, 75], "2782": 94, "2787": 94, "2796": 94, "28": [26, 74, 75, 93], "2820": 93, "2826700": 20, "28390": 74, "287113": 74, "288": [29, 94], "29": [75, 90], "290": 29, "292": 75, "2939": 93, "294": 75, "297": 37, "29889": 54, "29892": 37, "299": [26, 74], "29962": 37, "2998": 93, "2b": [19, 65, 74], "2cta": 28, "2d": [12, 83, 84, 91], "2k": [20, 26, 27, 28], "2m": 81, "2nd": 83, "2u": 1, "2x": [22, 23], "3": [0, 1, 3, 5, 7, 9, 10, 17, 21, 22, 23, 25, 26, 27, 28, 44, 45, 47, 49, 53, 55, 59, 60, 65, 67, 68, 70, 71, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 88, 89, 93, 94, 95, 96], "30": [0, 12, 20, 26, 71, 75, 76, 78, 81, 83, 90], "300": [24, 29, 74], "3000": [74, 75], "30000": 75, "30065": 74, "3019": 74, "3021": 20, "3022": 74, "303": 23, "3031": 80, "304": [37, 54], "3040": [76, 80, 81], "306": 37, "3072": 20, "30990": 74, "30b": 25, "30x": 25, "31": [11, 75, 76, 80, 81], "311": 75, "3132": 74, "315": [26, 75], "318": 75, "32": [1, 5, 9, 11, 20, 22, 23, 29, 30, 37, 54, 71, 74, 75, 76, 83, 84, 85, 88, 89, 90, 91, 93, 94, 95], "3201": 76, "321": 74, "322": [37, 54], "3276": [76, 80, 81], "32768": 83, "3291": 93, "32b": 94, "32k": 94, "32x": 25, "33": [75, 93], "332": 75, "3328": 93, "3338": 76, "338": [26, 37, 54], "3389": 78, "339447": 29, "339981": 29, "33x": 27, "34": [20, 29, 75], "340": [26, 75], "341": [23, 29], "3442": 93, "3445": 93, "3452": 93, "3476": 20, "348gib": 29, "349": 23, "34b": 94, "35": [0, 71, 75], "3504": 29, "351": 75, "3555": 93, "35611": 20, "357": 75, "36": [26, 29, 75, 77, 78], "3671": 74, "367714": 29, "368": 26, "37": [29, 74], "370": 75, "371": 75, "374": 75, "375": 75, "3763": 26, "379": 75, "38": [74, 75], "381": 29, "384": [20, 75], "3863": 75, "387": 75, "387b12598a9e": 74, "3885": 20, "3887": 93, "39": [26, 75], "3914": 75, "3936": 74, "3977": 93, "399": 75, "3_1": 92, "3_3": 92, "3b": [35, 40, 62], "3d": [5, 83, 88], "3rd": 83, "3u": 1, "3x": [25, 26, 28], "4": [0, 1, 2, 7, 9, 10, 11, 12, 16, 19, 23, 25, 26, 27, 28, 29, 31, 37, 44, 45, 50, 54, 55, 56, 57, 58, 65, 71, 74, 75, 76, 78, 79, 80, 81, 82, 83, 85, 88, 89, 90, 91, 92, 93, 94], "40": [6, 75, 78, 83, 94], "400": [28, 29], "4000": 28, "403": 94, "405": 54, "405b": [74, 77], "4060": 90, "4066": 37, "408": 75, "408348": 29, "4089": 81, "4096": [21, 28, 37, 74, 75, 78, 83, 84, 88], "40b": 25, "40gb": 30, "40x": 25, "41": 75, "41020": 74, "411": 74, "4117e": 74, "4133": 81, "41375": 74, "414": 20, "41607": 74, "4168": 20, "4192": 93, "42": [29, 53, 74, 75], "4203099703668305365": 52, "4224": 75, "4248": 78, "4265": 74, "427": [54, 74, 75], "4280": 26, "43": [75, 89, 90], "433": 75, "437": 75, "438": 75, "4384": 29, "44": [29, 75, 90], "4408": 37, "442": 75, "4439": 74, "4451": 20, "4456": 75, "447": 75, "448": 75, "449": 94, "4493": [20, 80, 81], "4495": 29, "4497": 75, "44x": 25, "45": [9, 29, 75, 92, 94], "450": 75, "45000000000": 9, "453": 75, "4566": 75, "458676": 29, "459": 75, "46": 25, "4600": 28, "461014": 29, "462": 75, "463": 75, "464": 29, "4653": 37, "4656": 75, "466": 75, "4667": 75, "47": [25, 29, 78], "4701": 74, "471": 75, "472": 37, "475": 75, "477": 75, "478": 94, "47x": 25, "48": [29, 75, 78, 90, 94], "480gb": 29, "481": [22, 75], "482": 94, "488": 75, "49": [29, 75, 78], "491": 29, "49152": 20, "495": 75, "496": 11, "4963": 74, "498043": 29, "49b": 92, "4b": 94, "4bit": 21, "4gb": 28, "4u": 1, "4x": [21, 22, 23], "5": [0, 1, 9, 10, 12, 13, 15, 21, 22, 23, 25, 26, 27, 28, 29, 35, 40, 45, 46, 52, 55, 62, 70, 71, 74, 75, 80, 83, 85, 88, 92, 93, 94], "50": [0, 25, 29, 46, 71, 74, 75, 94], "500": [26, 28, 75], "5000": 75, "500000": 85, "5001": 54, "5007": 37, "500m": 25, "50272": 15, "505143404006958": 31, "5064": 75, "5073": 93, "50m": 29, "51": 75, "512": [1, 10, 13, 23, 24, 71, 74, 75, 78, 80, 85], "5120": 20, "512mb": 2, "514": 75, "518": [37, 75], "51b": [92, 94], "51x": 25, "52": 29, "52269": 75, "524": 75, "525": 75, "526": [54, 75, 94], "52667": 75, "529": 75, "529514": 29, "5299": 78, "53": [29, 74, 80, 81], "5305": 78, "531": 75, "537602": 29, "54": [25, 75], "540": 74, "543": 75, "544": 75, "5496": 78, "5497": 75, "55": [25, 74, 75], "5500": 75, "5510": 74, "5514": 74, "5530": 75, "554": 75, "557": 75, "559": 75, "56": [25, 29, 75], "560": 21, "562": [10, 13], "564": 29, "56401920000": 31, "564272": 29, "565": 75, "567": 75, "568": [74, 75], "57": [29, 74, 75], "570": 29, "571": 75, "572": 75, "5739": 20, "5742": [78, 80], "579": 75, "58": [26, 29, 75, 80], "580": 75, "5821": 75, "5830": 93, "5874": 93, "5877": 78, "5879": 93, "588": 75, "58x": 26, "59": [29, 74], "590": [37, 75], "5918": 93, "5942": 20, "5957": 93, "5976": 78, "598": 75, "5980": 78, "5b": 94, "5th": [28, 83], "5u": 1, "5x": [22, 25, 26], "6": [0, 1, 6, 9, 10, 12, 23, 25, 26, 27, 28, 29, 31, 45, 55, 71, 75, 83, 88, 92, 93, 94], "60": [0, 29, 75], "600": 32, "6000": 74, "602": 75, "6049": 78, "6059": 74, "6064": 93, "608": 75, "61": 75, "610": 75, "6100": 20, "612328": 29, "6157": 93, "618": 75, "62": [26, 75, 80], "623219": 29, "6255": 93, "626": 37, "6299": 93, "63": [44, 45, 55, 66, 74, 75, 80, 85, 90], "630": 75, "63266": 76, "63307": 76, "63308": 76, "63331": 76, "63374": 76, "634": 75, "63456": 76, "6345624": 76, "6372": 78, "639": 94, "64": [0, 1, 5, 6, 15, 20, 22, 23, 29, 30, 35, 40, 59, 62, 75, 80, 83, 84, 85, 90, 94], "640": [21, 75], "640gb": 28, "6452": 81, "6475": 80, "649": 94, "64x": 26, "65": [68, 75], "65024": 93, "651199": 29, "6523": 81, "653": 75, "654": 23, "6550": 78, "6554": 80, "656": 75, "657": 75, "659": 75, "6591": 74, "66": [26, 75], "661": 75, "6628": [80, 81], "6678": 90, "6684": 81, "6695": 90, "67": [25, 26, 29, 75], "6701": 20, "671": 20, "67108864": 66, "671b": 27, "673": 94, "675": 74, "6753e": 74, "6769": 80, "679": 22, "68": [25, 26, 75, 81], "682": 75, "6825": 74, "683": 75, "684": 26, "685": 75, "6852": [78, 80], "686": 75, "6862": 74, "6890": 93, "69": [25, 26, 29, 75, 81, 89], "6925": 74, "6938": 37, "695": 94, "696": 75, "697": 28, "6975": 78, "6976": [76, 80, 81], "698": 75, "6a": 21, "6b": [22, 74, 83, 94], "6x": 23, "7": [0, 1, 9, 10, 21, 22, 25, 26, 27, 28, 29, 45, 55, 65, 66, 67, 68, 74, 75, 76, 83, 88, 93], "70": [0, 25, 29, 81, 90], "700": 32, "7000": 74, "701": 94, "7031": 78, "704": 75, "705": [28, 94], "706": 75, "7063": 74, "707": 75, "7072": 75, "709": 74, "7090": 93, "70b": [5, 19, 23, 25, 55, 76, 78, 79, 80, 81, 82, 92, 94], "70g": 19, "71": [26, 74, 75], "711": 75, "712": 75, "7134": 93, "7136": 76, "714": 75, "7144": 93, "7168": [26, 28], "717": 75, "717498": 29, "7187": 75, "7188": 20, "72": [29, 75, 77], "722": 75, "727": 75, "728516": 29, "72b": [92, 94], "73": [26, 75], "732": 75, "734": 75, "736": 75, "737": 75, "7382": 75, "739": 94, "74": [26, 75], "741": [75, 94], "742": 75, "745": 75, "7456": 20, "74561": 20, "747": 75, "7480": 76, "75": [25, 29, 74, 94], "750": [23, 75], "7502": 76, "7520": 20, "755": 32, "7584": 20, "75903": 75, "76": 75, "7607": 80, "7621": 75, "7638": [76, 80, 81], "767": 75, "768": [15, 84], "77": [29, 75], "772": 75, "7743": 76, "7770": 76, "78": [26, 75, 78], "780": 74, "7842": 78, "78509": 75, "7876": 80, "79": [74, 90], "7900": 93, "791": 29, "792": 29, "7933": 80, "794": [75, 94], "7949": 93, "7977": 78, "7a": 21, "7b": [10, 12, 13, 25, 31, 44, 45, 55, 74, 75, 89, 92, 94], "7x": [22, 26], "8": [0, 1, 5, 9, 10, 11, 15, 16, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 38, 41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 60, 65, 67, 68, 71, 74, 75, 76, 77, 78, 82, 83, 84, 85, 89, 90, 91, 93], "80": [0, 6, 23, 26, 28, 66, 75, 94], "800": [21, 75, 94], "8000": [31, 34, 35, 36, 38, 39, 40, 61, 62, 63, 89], "8002": 74, "8005": 75, "803": 21, "8048": 74, "80gb": [22, 25, 30, 75, 76, 78, 79], "81": [26, 29, 75, 78], "810": 75, "8149": 93, "8179": 93, "819": 23, "8192": [30, 71, 74, 75, 76, 80, 83, 84, 93, 94], "82": [26, 75, 78], "820": 74, "8212": 1, "8218": 93, "822": 75, "8225": 78, "825": 94, "8259": 74, "83": 75, "8307": 81, "8351": 74, "838": 75, "84": [26, 75], "840": 75, "841": 75, "8441": 74, "85": [20, 25, 74, 75, 94], "850": 75, "851": 75, "854": 75, "86": [66, 75], "863": 74, "866": 75, "867": 75, "8672": 93, "87": [25, 29, 75], "8779": 93, "88": [75, 78, 81], "8804": 76, "880676": 29, "8828": 93, "8841": 78, "89": [25, 26, 66, 75, 92], "893": 75, "8932": 74, "8958": 81, "896": [54, 75], "8a": 24, "8b": [47, 55, 70, 74, 89, 92, 95], "8bit": 22, "8tb": 23, "8x": 28, "8x7b": [4, 74, 92, 94], "8xb200": 26, "8xgpu": 28, "8xh100": 24, "8xh200": 21, "9": [0, 1, 10, 12, 19, 22, 26, 27, 29, 45, 55, 60, 65, 75, 78, 83, 93], "90": [0, 20, 29, 66, 71, 74, 75, 76, 78, 82, 90], "9007": 20, "9028": 93, "907": 22, "9087": 81, "91": 75, "910": 75, "9101": 75, "911": 75, "9115": 81, "912656": 20, "913": 75, "9184": 78, "92": [26, 75], "920": 75, "9203": 78, "9214": 75, "924": 15, "925": 75, "9274": 76, "93": [29, 75], "935": 94, "9353e": 76, "9379": 20, "94": 75, "94022": 75, "941": [21, 24], "943": 54, "944": 75, "946": 21, "947": 75, "948": 29, "9494": 80, "95": [31, 38, 41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 60, 67, 68, 75, 76, 82, 89], "9521": 93, "953": 75, "9537": 78, "954": 28, "955200": 29, "956": 75, "957": 75, "96": [21, 26, 28, 75, 78, 94], "960": 21, "9606": 28, "960gb": 29, "961": 75, "9613": 28, "9623": 80, "9629": 28, "963": 75, "9639": 75, "96583": 75, "967": 94, "9692": 93, "97": [28, 74, 75, 78], "970": 75, "976442": 29, "98": 75, "983": 94, "987": 94, "99": [9, 26, 29, 32, 75], "990": 75, "991": 75, "992": 94, "9928": 81, "9938": 20, "9982": [80, 81], "9x": [23, 24], "A": [0, 1, 2, 3, 5, 6, 8, 10, 12, 15, 16, 19, 20, 25, 26, 29, 53, 56, 57, 58, 59, 71, 73, 74, 75, 83, 88, 94, 96, 98], "AND": 83, "And": [12, 19, 27, 28, 29, 83, 84, 90], "As": [4, 5, 7, 10, 12, 16, 18, 27, 29, 37, 78, 81, 82, 83, 90, 91, 93, 98, 99], "At": [14, 28, 59, 78, 84, 90], "But": [5, 8, 29, 72], "By": [0, 1, 2, 6, 12, 26, 28, 29, 37, 66, 71, 74, 78, 81, 83, 93, 98], "For": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 41, 53, 56, 57, 58, 64, 66, 70, 74, 75, 76, 77, 78, 80, 81, 82, 83, 88, 89, 90, 93, 94, 96, 97, 98, 99, 100], "If": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16, 17, 19, 25, 27, 29, 30, 31, 32, 33, 66, 67, 68, 70, 71, 72, 74, 76, 77, 78, 80, 81, 82, 83, 85, 88, 90, 92, 93, 94, 96, 98, 99, 100], "In": [0, 1, 2, 7, 8, 11, 12, 16, 17, 19, 20, 22, 25, 26, 27, 28, 29, 33, 37, 55, 59, 65, 66, 74, 75, 76, 77, 78, 80, 81, 83, 89, 90, 91, 92, 93, 94, 98, 99, 100], "It": [0, 1, 3, 5, 6, 7, 10, 12, 14, 16, 17, 18, 20, 21, 24, 25, 26, 27, 28, 29, 30, 37, 52, 59, 66, 71, 72, 74, 75, 78, 79, 80, 81, 82, 83, 89, 91, 93, 96, 97, 98, 100], "Its": [5, 83, 98], "NOT": 83, "No": [0, 2, 9, 29, 59, 74, 76], "Not": [1, 25], "ON": [74, 78, 80, 81], "OR": 83, "Of": [26, 94], "On": [5, 9, 29, 66, 68, 73, 77, 81, 83, 94], "One": [2, 15, 16, 29, 80, 83, 93, 97], "Or": [83, 88, 95], "That": [3, 5, 6, 9, 16, 72, 78, 83], "The": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 65, 66, 67, 68, 70, 71, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100], "Their": 28, "Then": [10, 19, 27, 29, 31, 32, 74, 76, 83, 96, 99], "There": [2, 5, 6, 7, 8, 9, 10, 15, 19, 23, 26, 27, 28, 29, 37, 66, 68, 70, 83, 86, 90, 91, 93, 94, 97, 98, 99, 100], "These": [2, 12, 19, 21, 23, 24, 26, 28, 29, 37, 74, 76, 77, 84, 86, 89, 94], "To": [2, 3, 5, 9, 10, 12, 13, 16, 17, 18, 19, 20, 23, 26, 27, 29, 66, 70, 71, 72, 73, 74, 75, 78, 80, 81, 82, 83, 89, 90, 91, 94, 95, 96, 98, 99, 100], "Will": 0, "With": [5, 6, 12, 16, 29, 32, 37, 50, 65, 74], "_": [0, 3, 17, 86], "__all__": 96, "__call__": 53, "__init__": [7, 14, 16, 17, 53, 71, 74, 93, 94, 96, 98, 100], "__main__": [41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 67, 68, 70, 76, 78, 81, 82, 89, 94, 95, 96], "__name__": [41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 67, 68, 76, 78, 81, 82, 89, 94, 95, 96], "__post_init__": 94, "__repr__": 94, "_capac": 1, "_context_logits_auto_en": 71, "_cpp_gen": 3, "_create_tensor": 16, "_explicitly_disable_gemm_plugin": 86, "_generation_logits_auto_en": 71, "_handl": 1, "_mark_output": 93, "_mpi_sess": 71, "_note": 5, "_num_postprocess_work": 71, "_path": 20, "_postproc_param": 71, "_postprocess_result": 71, "_postprocess_tokenizer_dir": 71, "_reasoning_pars": 71, "_return_log_prob": 71, "_run": 93, "_runtim": 88, "_static": 16, "_str_to_trt_dtype_dict": 83, "_torch": [74, 94, 95, 96, 97, 98], "_torchllmargs__context": 71, "_trtllmargs__context": 71, "_unsign": 1, "_util": 83, "a10": 30, "a100": [6, 19, 30], "a10g": 30, "a2": 94, "a30": 30, "a40": 30, "a8": 91, "a_": 83, "a_1": 83, "a_2": 83, "a_n": 83, "a_sf": 83, "aarch64": 92, "ab": [10, 30, 83, 91], "abbrevi": 31, "abc": 27, "abcd": 27, "abi": [66, 94], "abil": [72, 74], "abl": [5, 22, 26, 29, 68, 74, 80, 83, 94], "ablat": [27, 28], "abnorm": [29, 94], "abort": [71, 94], "about": [0, 1, 3, 19, 20, 21, 22, 24, 25, 28, 29, 52, 59, 60, 65, 74, 76, 78, 79, 81, 83, 89, 90, 93, 94], "abov": [2, 10, 11, 16, 19, 20, 25, 28, 29, 37, 66, 74, 75, 76, 78, 81, 90], "absenc": 6, "absorb": 26, "abstract": [81, 84], "ac": 94, "acc": 83, "acceler": [5, 11, 12, 22, 23, 24, 25, 29, 30, 72], "accept": [0, 1, 12, 20, 29, 37, 47, 48, 49, 50, 51, 66, 71, 76, 78, 83, 88, 89, 92, 94, 98], "accept_length": 88, "acceptancelength": 0, "acceptancer": 0, "acceptancethreshold": 0, "acceptedlen": 1, "acceptedlengthscumsum": 1, "acceptedpath": 1, "acceptedpathid": 1, "acceptedtoken": 1, "acceptedtokenslen": 1, "access": [3, 29, 33, 46, 71, 74, 76, 83, 89, 94], "accessor": 1, "accommod": [4, 97, 99], "accomplish": 77, "accord": [5, 17, 60, 83, 84, 98], "accordingli": 17, "account": [16, 20, 32, 56, 57, 58, 66], "accumul": [0, 5, 6, 29, 30, 71, 83, 88, 89], "accur": [21, 27, 46, 74, 76, 94], "accuraci": [21, 26, 28, 30, 78, 82, 83, 91, 94], "achiev": [2, 12, 20, 21, 25, 26, 28, 29, 66, 75, 76, 78, 80, 82, 96], "across": [2, 4, 5, 6, 7, 16, 17, 23, 26, 29, 31, 75, 77, 78, 80, 81, 83, 88], "act": [26, 29], "act_fn": 84, "act_typ": [16, 83], "action": 55, "activ": [0, 1, 5, 7, 16, 20, 21, 22, 25, 26, 28, 29, 30, 77, 83, 91, 92, 94, 100], "activation_scaling_factor": 15, "activationtyp": [16, 83], "active_request": 100, "actual": [7, 8, 12, 20, 25, 26, 27, 29, 30, 78, 80, 81, 82, 94, 99], "ad": [1, 5, 6, 7, 9, 12, 13, 19, 27, 28, 33, 53, 65, 73, 77, 80, 81, 83, 85, 88, 94, 95, 97], "ada": [5, 25, 60, 66, 72, 78, 92, 94], "adalayernorm": 84, "adalayernormcontinu": 84, "adalayernormzero": 84, "adalayernormzerosingl": 84, "adapt": [0, 10, 27, 29, 41, 42, 71, 83, 84, 94, 96], "adapter_s": 10, "adapters": 1, "add": [1, 3, 5, 7, 10, 14, 15, 16, 19, 27, 32, 33, 53, 55, 66, 70, 71, 74, 76, 78, 81, 83, 88, 93, 94, 96, 99], "add_activ": 16, "add_argu": 55, "add_bias_linear": 85, "add_generation_prompt": 26, "add_input": 83, "add_output": 83, "add_padding_request": 99, "add_qkv_bia": 85, "add_rmsnorm": 26, "add_sequ": 88, "add_special_token": [26, 71, 88, 94], "addcumlogprob": 94, "added_kv_proj_dim": 84, "added_proj_bia": 84, "addit": [0, 5, 6, 10, 12, 16, 19, 23, 27, 28, 29, 31, 37, 46, 66, 71, 74, 75, 77, 78, 80, 83, 84, 91, 92, 93, 94, 98, 99], "addition": [2, 74, 76, 78, 81, 96, 98], "additional_model_output": 71, "additional_opt": 58, "additionalmodeloutput": [0, 3, 71], "additionaloutput": [0, 3], "addr": 0, "address": [1, 17, 20, 25, 26, 28, 29, 70, 81, 90, 94], "addresswiths": 1, "adequ": 84, "adher": 46, "adjust": [29, 56, 71, 74, 76, 90, 100], "admin": 68, "adopt": [6, 19, 29], "advanc": [12, 16, 24, 26, 27, 28, 29, 30, 43, 47, 48, 50, 51, 66, 71, 83, 94, 98], "advantag": [6, 29, 72], "advers": [21, 30], "advertis": 74, "advis": 2, "affect": [11, 19, 20, 30, 76, 78, 80, 81, 90], "affin": 84, "aforement": 29, "after": [0, 1, 3, 5, 7, 8, 9, 10, 12, 16, 17, 26, 27, 28, 29, 30, 31, 32, 52, 55, 66, 70, 71, 74, 78, 80, 81, 82, 83, 84, 86, 89, 90, 94, 98, 100], "again": [16, 29, 76, 78, 81, 93], "against": [66, 74], "agent": 23, "agentdesc": 0, "agentnam": 0, "agentst": 0, "aggreg": [28, 29], "aggress": [15, 27, 78, 82], "agre": [70, 89], "ahead": [0, 5, 12], "ai": [20, 22, 26, 29, 31, 38, 41, 43, 44, 45, 47, 48, 49, 50, 51, 55, 60, 67, 68, 72, 73, 76, 82, 83, 89, 92, 94, 95], "aidc": 94, "aim": [4, 15, 20, 26, 29, 72, 74, 76, 78, 94], "ainsli": 21, "air": 94, "aka": 83, "akhoroshev": 94, "al": 21, "albeit": 12, "alessionetti": 94, "algorithm": [0, 5, 6, 12, 15, 16, 19, 25, 26, 27, 28, 29, 71, 74, 78, 83, 94], "alia": [71, 84, 85], "alibi": 83, "alibi_bias_max": [83, 84], "alibi_scal": 83, "alibi_slop": 83, "alibi_with_scal": 83, "align": [74, 94, 100], "align_corn": 83, "all": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 17, 19, 20, 23, 26, 27, 28, 29, 53, 56, 57, 58, 59, 66, 71, 72, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 86, 88, 89, 90, 91, 92, 93, 94, 98, 99, 100], "all2al": 29, "all_reduce_param": [83, 84], "allbitset": [0, 1], "allgath": [16, 28, 30, 81, 83, 94], "allgeneratedtoken": 0, "alllayersdrafttokenid": 1, "alllayersdrafttokenidspredecessor": 1, "alllayersscor": 1, "alloc": [0, 1, 2, 5, 8, 9, 31, 37, 71, 82, 83, 88, 90, 93, 94, 97, 98, 99, 100], "allocateipcmemori": 1, "allocatespeculativedecodingbuff": 1, "allocnewblock": 0, "allocnewblocksperrequest": 0, "alloctotalblock": 0, "alloctotalblocksperrequest": 0, "allot": 0, "allottedtimem": [0, 94], "allow": [0, 1, 2, 3, 5, 6, 9, 12, 15, 21, 24, 28, 29, 30, 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 83, 86, 93, 94, 97, 100], "allowed_token_id": 53, "allreduc": [16, 26, 28, 30, 81, 83, 94], "allreducebuff": 1, "allreducefusionkernel": 26, "allreducefusionop": 83, "allreduceparam": [83, 84], "allreducestrategi": [11, 83], "almost": [16, 28, 29, 78, 80, 90], "alon": 4, "along": [5, 12, 18, 66, 83, 94], "alongsid": 29, "alpaca": 10, "alpha": [71, 83, 84, 94], "alphabet": 83, "alreadi": [0, 5, 7, 9, 18, 20, 26, 27, 28, 29, 71, 78, 80, 82, 83, 94, 96, 99], "also": [0, 2, 3, 5, 7, 12, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 50, 52, 53, 66, 70, 71, 74, 75, 76, 77, 78, 79, 80, 83, 84, 89, 90, 91, 94, 96, 97, 98, 99], "altair": 94, "alter": [3, 7], "altern": [3, 26, 53, 66, 96, 97], "although": [7, 16, 74, 78, 81], "alwai": [0, 1, 3, 5, 6, 9, 15, 16, 19, 28, 29, 54, 71, 80, 81, 83, 93], "always_share_across_beam": 88, "am": [43, 47, 48, 50, 51, 53, 60, 76, 82, 88], "ambigu": 1, "amd": 94, "amen": [0, 3, 71], "among": [33, 83], "amongst": 83, "amount": [0, 9, 16, 28, 29, 30, 71, 74, 80, 82, 88, 90, 93], "amper": [22, 66, 72, 92, 94], "an": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 26, 27, 28, 29, 30, 31, 37, 43, 46, 47, 48, 49, 50, 51, 53, 60, 66, 68, 70, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 88, 89, 90, 91, 93, 94, 96, 97, 98, 99, 100], "analys": 29, "analysi": [7, 26, 27, 28, 29, 65, 90], "analysispatternmanag": 7, "analyt": 22, "analyz": [7, 76], "ani": [0, 1, 2, 3, 7, 8, 12, 17, 19, 20, 28, 29, 31, 53, 66, 70, 71, 72, 74, 75, 80, 81, 82, 83, 85, 88, 93, 96, 97, 98], "announc": [20, 21, 22, 24], "anoth": [0, 1, 5, 7, 10, 19, 22, 26, 27, 28, 29, 31, 80, 83, 93, 98, 100], "answer": [27, 46], "antialia": 83, "antonin": [43, 47, 48, 50, 51], "anybitset": [0, 1], "anymor": 29, "anyth": [59, 75], "aotman": 94, "apart": 37, "api": [2, 6, 9, 12, 14, 15, 16, 18, 20, 27, 28, 29, 37, 38, 50, 56, 57, 58, 65, 66, 72, 73, 74, 75, 78, 79, 81, 82, 83, 90, 93, 95], "api_kei": [31, 61, 62, 63], "app": [66, 94], "appar": 72, "appear": [0, 5, 6, 52, 68, 71, 83, 93, 94], "append": [27, 53, 60, 73, 83, 100], "append_paged_kv_cach": 98, "appl": 94, "appli": [0, 2, 3, 5, 7, 10, 12, 15, 16, 17, 26, 27, 28, 29, 30, 66, 71, 72, 74, 83, 84, 88, 91, 94, 98], "applic": [9, 12, 22, 25, 26, 28, 29, 31, 34, 35, 36, 68, 70, 72, 73, 89, 93, 94, 100], "apply_batched_logits_processor": [53, 71], "apply_chat_templ": [26, 46], "apply_llama3_sc": 83, "apply_query_key_layer_sc": [84, 85], "apply_residual_connection_post_layernorm": 85, "apply_rotary_pos_emb": 83, "apply_rotary_pos_emb_chatglm": 83, "apply_rotary_pos_emb_cogvlm": 83, "apply_silu": 83, "applybiasropeupdatekvcach": 94, "applyrop": 26, "appreci": 28, "approach": [0, 2, 4, 7, 9, 11, 12, 26, 27, 28, 29, 70, 74, 82], "appropri": [25, 37, 93], "approv": 53, "approxim": [28, 29, 66, 84], "apt": [20, 32, 66, 67, 68], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 43, 46, 47, 48, 50, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100], "arang": 83, "arbitrag": 74, "arbitrari": [17, 94], "arbitrary_types_allow": 71, "architectur": [2, 4, 6, 9, 15, 22, 27, 28, 29, 66, 72, 85, 88, 92, 94, 95], "arctic": [92, 94], "area": [28, 29, 60], "aresult": 37, "arg": [0, 7, 19, 31, 55, 71, 84, 85, 88, 94], "arglist": 7, "argmax": 83, "argpars": 55, "argument": [2, 3, 20, 31, 37, 50, 53, 66, 70, 71, 74, 77, 83, 90, 94, 98], "argumentpars": 55, "aris": 66, "arithmet": 16, "armor": 52, "around": [1, 15, 19, 72, 76, 81], "arrai": [0, 1, 71, 83, 88], "arrayview": [0, 1], "arriv": [0, 4], "arrivaltim": 0, "arrow": 83, "art": [20, 26, 29], "articl": [5, 12, 26, 27], "artifici": 72, "artist": 60, "arxiv": [0, 1, 4, 10, 30, 83, 91], "as_dtyp": 83, "as_lay": 7, "as_shap": 83, "ascii": 83, "asciichar": 1, "ask": [52, 59, 93], "aspect": 5, "assembl": [16, 18], "assert": [7, 83, 93, 94, 100], "assert_valid_quant_algo": 85, "assign": [0, 2, 19, 29, 84, 86, 96], "assist": [6, 31, 34, 35, 46, 61, 62, 70, 89], "assistant_model": 6, "associ": [1, 3, 4, 10, 28, 66, 76, 83], "asssembl": 12, "assum": [1, 3, 9, 10, 12, 13, 20, 27, 28, 29, 71, 74, 83, 85, 88], "assumpt": [12, 30], "assur": 29, "async": [37, 48, 49, 71, 74, 88], "asynchron": [1, 3, 29, 37, 41, 42, 71], "asynchroni": 29, "asyncio": [48, 49], "asyncllmengin": 94, "atom": 1, "attach": [2, 20], "attempt": [0, 2, 75, 76, 78], "attend": 82, "attent": [0, 1, 2, 6, 8, 9, 10, 12, 14, 16, 17, 20, 21, 29, 30, 65, 71, 83, 88, 89, 90, 93, 94, 95, 96, 99], "attention_backend": [96, 98], "attention_head_s": [83, 84], "attention_mask": [83, 84, 85, 88, 98], "attention_mask_param": 85, "attention_mask_typ": 84, "attention_multipli": 85, "attention_output": 93, "attention_output_orig_quant_scal": 83, "attention_output_sf_scal": 83, "attention_packed_mask": [83, 84], "attention_param": [84, 85], "attention_qk_half_accumul": 94, "attention_window_s": 8, "attentionconfig": 0, "attentionheads": 1, "attentionmask": 98, "attentionmaskparam": 84, "attentionmasktyp": [83, 84], "attentionmetadata": 96, "attentionparam": [84, 85], "attentiontyp": 0, "attn_backend": [71, 98], "attn_bia": 85, "attn_dens": [10, 30], "attn_forward_funcnam": 84, "attn_k": [10, 30], "attn_logit_softcap": 85, "attn_logit_softcapping_scal": 83, "attn_metadata": 96, "attn_processor": 85, "attn_q": [10, 30], "attn_qkv": [10, 30], "attn_v": [10, 30], "attribut": [0, 1, 3, 7, 17, 19, 88], "audio": [88, 94], "audio_engine_dir": 88, "audio_featur": 88, "audio_path": 88, "authent": [70, 76, 89], "authorized_kei": [32, 33], "auto": [0, 1, 2, 3, 5, 6, 11, 13, 16, 29, 43, 52, 71, 74, 81, 83, 85, 86, 87, 94], "auto_deploi": 94, "auto_deploy_config": 71, "auto_parallel": [30, 43, 71, 94], "auto_parallel_config": 71, "auto_parallel_world_s": [43, 71], "auto_quantize_bit": 87, "autoawq": 94, "autodeploi": 94, "autogptq": 94, "autom": [46, 94], "automat": [0, 3, 7, 11, 16, 17, 26, 29, 31, 37, 41, 42, 70, 72, 74, 76, 83, 90, 91, 94], "autoparallelconfig": 71, "autopp": 94, "autoq": 94, "autoregress": [0, 12, 98, 99], "autotoken": 37, "autotun": [71, 94], "autotuner_en": [52, 71], "aux": 90, "auxiliari": 12, "avaiable_block": 100, "avail": [0, 1, 3, 7, 9, 16, 21, 23, 29, 31, 37, 43, 47, 48, 50, 51, 53, 66, 72, 74, 80, 81, 82, 88, 89, 90, 91, 94, 95, 98, 99], "averag": [0, 12, 20, 27, 29, 71, 74, 75, 76, 78, 80, 81], "avg": [74, 76, 83], "avg_pool2d": 83, "avgnumdecodedtokensperit": 0, "avgpool2d": 84, "avoid": [1, 2, 19, 26, 27, 28, 29, 66, 70, 88, 90, 94], "awai": [80, 81], "await": [0, 3, 37, 48, 49], "awaitcontextrespons": 0, "awaitgenerationrespons": 0, "awaitrespons": [0, 2, 3], "awar": [2, 5, 21, 93], "awq": [25, 37, 60, 65, 92, 94], "awq_block_s": 87, "ax": 83, "axi": [24, 29, 83], "b": [1, 2, 7, 10, 16, 21, 22, 23, 24, 73, 83, 85, 88, 94], "b200": [27, 28, 29, 75, 94], "b_sf": 83, "back": [0, 2, 9, 11, 12, 29, 47, 50, 68, 75, 94], "backbon": 72, "backend": [0, 2, 3, 12, 16, 18, 20, 27, 28, 29, 31, 38, 46, 52, 53, 56, 57, 58, 65, 71, 73, 74, 75, 89, 94, 97, 99, 100], "backend_token": [0, 3], "backendagentdesc": 0, "background": 29, "backu": [0, 3, 71], "backward": 19, "bad": [0, 3, 71, 94], "bad_token_id": 71, "bad_words_data": 88, "bad_words_list": 88, "badword": 0, "badwordslen": 1, "badwordslist": 1, "badwordsptr": 1, "baichuan": [70, 91, 92, 94], "baichuan2": 92, "baichuanconfig": 85, "baichuanforcausallm": 85, "balanc": [4, 6, 12, 16, 28, 71, 80, 82], "band": 46, "bandwidth": [6, 16, 21, 22, 23, 25, 28, 29, 46], "bangbang": 22, "bantoken": 0, "banword": 0, "bar": 71, "bare": [94, 95], "barissglc": 59, "barnardo": 52, "bart": [92, 94], "base": [0, 1, 2, 3, 9, 10, 11, 12, 14, 17, 18, 19, 20, 21, 22, 25, 26, 28, 29, 30, 48, 49, 55, 66, 71, 72, 74, 80, 82, 83, 84, 85, 86, 87, 88, 90, 92, 94, 95, 96, 97, 99, 100], "base64": 62, "base_model": 10, "base_s": 84, "base_url": [31, 61, 62, 63], "baseagentconfig": 0, "basekvcachemanag": 0, "baselin": [25, 26, 27, 28, 76, 80, 81, 98], "baseline_fp8_engin": 78, "basellmarg": 71, "basemodel": 71, "baseresourcemanag": [97, 99], "basetransferag": 0, "bash": [16, 31, 33, 34, 35, 36, 38, 39, 40, 56, 57, 58, 73], "basic": [14, 73, 83], "basic_string_view": 0, "batch": [0, 1, 6, 9, 10, 11, 12, 13, 16, 18, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 53, 65, 69, 71, 74, 75, 76, 78, 79, 81, 82, 83, 84, 88, 89, 90, 93, 94, 96, 97, 98, 99, 100], "batch_beam_s": [5, 83], "batch_dim": 83, "batch_idx": 88, "batch_input_id": 88, "batch_manag": [0, 1, 99], "batch_schedul": 94, "batch_siz": [5, 7, 13, 15, 21, 24, 83, 84, 87, 88, 90, 98], "batchdon": 1, "batched_logits_processor": [53, 71], "batchedlogitsprocessor": [53, 71], "batchidx": 1, "batchindex": 1, "batching_typ": 71, "batchingtyp": [0, 71], "batchsiz": [0, 1, 6, 22], "batchsizelimit": 0, "batchsizet": 0, "batchslot": 1, "batchslotshostcopi": 1, "batchslotsrequestord": 1, "bc": 83, "beam": [0, 1, 6, 12, 18, 24, 30, 31, 37, 50, 65, 71, 83, 88, 90, 93, 94], "beam_search_diversity_r": [71, 88], "beam_width": [5, 6, 37, 83, 88, 94], "beam_width_arrai": 71, "beamhypothes": 1, "beamsearch": 0, "beamsearchbuff": 1, "beamsearchdiversityr": [0, 1, 6], "beamsiz": 0, "beamtoken": [0, 3], "beamwidth": [0, 1, 2, 3, 6, 71, 94], "beamwidtharrai": [0, 1, 6], "becam": 0, "becaus": [0, 3, 9, 20, 25, 26, 27, 28, 29, 30, 37, 54, 59, 70, 74, 75, 76, 77, 78, 80, 82, 83, 90], "becom": [5, 6, 7, 9, 10, 16, 17, 25, 26, 28, 29, 52, 72], "been": [0, 3, 4, 5, 19, 22, 23, 26, 28, 29, 33, 55, 59, 66, 68, 71, 74, 78, 80, 83, 93, 94], "befor": [0, 1, 2, 3, 5, 7, 9, 10, 11, 15, 16, 17, 26, 27, 29, 56, 57, 58, 65, 66, 68, 71, 72, 73, 77, 78, 80, 82, 83, 85, 88, 90, 93, 94, 96, 97, 98, 99, 100], "beforehand": 76, "begin": [12, 70, 72, 77, 94, 96], "behav": [0, 90], "behavior": [2, 5, 75, 80, 83, 88, 90, 94], "behaviour": [0, 29, 83], "behind": [22, 28], "being": [0, 5, 9, 16, 19, 28, 59, 71, 80, 93, 94, 98], "believ": [29, 52, 74], "belong": 80, "below": [0, 5, 6, 7, 8, 10, 20, 23, 24, 25, 27, 28, 29, 32, 33, 74, 75, 78, 80, 81, 93], "bench": [20, 27, 29, 41, 42, 59, 74, 75, 79, 94], "benchmark": [26, 27, 29, 57, 65, 66, 73, 78, 79, 81, 89, 94], "benchmark_2nod": 31, "benefici": [28, 74, 80, 81], "benefit": [7, 9, 11, 23, 25, 27, 28, 29, 30, 72, 80, 94], "bert": [30, 83, 91, 92, 94], "bert_attent": 83, "bert_attention_plugin": 30, "bert_context_fmha_fp32_acc": 30, "bertattent": 84, "bertattentionplugin": 83, "bertbas": 85, "bertforquestionansw": 85, "bertforsequenceclassif": [85, 92], "bertmodel": 85, "besid": 97, "best": [5, 16, 26, 27, 28, 29, 54, 71, 73, 74, 77, 79, 80, 89, 94], "best_of": [71, 94], "best_path": 88, "best_path_len": 88, "best_path_length": 88, "best_perf_practice_on_deepseek": [26, 94], "bestpathindic": 1, "bestpathlength": 1, "beta": [31, 83], "beta_fast": 83, "beta_slow": 83, "better": [0, 2, 5, 6, 9, 11, 17, 19, 24, 26, 27, 28, 29, 30, 56, 57, 58, 71, 75, 77, 78, 81, 82, 94], "between": [0, 2, 5, 6, 8, 9, 12, 16, 17, 19, 26, 27, 28, 29, 35, 62, 68, 71, 73, 75, 77, 81, 82, 83, 84, 90, 93, 94, 96], "beyond": [1, 22, 78], "bf16": [5, 11, 17, 19, 26, 28, 65, 78, 81, 92, 94], "bfloat16": [5, 16, 30, 74, 76, 86, 91, 92, 94], "bhuvanesh09": 94, "bi": 5, "bia": [0, 3, 15, 16, 28, 71, 83, 84, 85, 94], "bias": [15, 83], "bidirect": [83, 84], "bidirectionalglm": 83, "bigger": 9, "biggest": 9, "billion": 20, "bin": [15, 16, 17, 20, 31, 34, 35, 36, 38, 39, 40, 56, 57, 58, 73, 93, 94], "binari": [12, 16, 73, 83], "bind": [29, 53, 65, 71, 82, 88, 90, 94, 97, 99, 100], "bindcapacityschedul": 100, "bit": [0, 1, 5, 22, 29, 59, 83, 91], "bitmask": 94, "bl": [12, 85], "black": 7, "blackwel": [2, 20, 27, 29, 60, 65, 68, 77, 78, 92, 94], "blip": [91, 94], "blip2": [91, 92, 94], "blob": 26, "block": [0, 1, 2, 5, 6, 9, 16, 28, 29, 30, 37, 52, 53, 65, 70, 71, 80, 83, 88, 90, 94, 99], "block_controlnet_hidden_st": 85, "block_hash": 52, "block_num": 83, "block_siz": [83, 84, 88], "block_sparse_block_s": 83, "block_sparse_homo_head_pattern": 83, "block_sparse_num_local_block": 83, "block_sparse_param": 84, "block_sparse_vertical_strid": 83, "blockhash": 0, "blockidx": 1, "blockptr": 1, "blocksiz": 0, "blockspars": 83, "blocksparseattnparam": 84, "blog": [20, 21, 24, 25, 26, 27, 28, 29, 94], "bloodeagle40234": 94, "bloom": [6, 17, 91, 92, 94], "bloom_dict": 17, "bloomforcausallm": 85, "bloommodel": 85, "bm": 1, "bmm": 16, "board": 81, "bodi": 16, "book": 59, "bool": [0, 1, 7, 13, 15, 71, 83, 84, 85, 86, 88, 98], "boolean": [1, 3, 10, 83, 85, 86], "boost": [20, 26, 28, 29, 78, 80, 81], "born": [14, 16, 93], "borrow": [37, 50, 74], "bos_token_id": 88, "both": [0, 2, 4, 5, 7, 8, 10, 12, 16, 17, 20, 22, 25, 26, 27, 28, 29, 30, 41, 55, 71, 74, 75, 77, 80, 82, 83, 84, 90, 91, 94, 97, 98], "bottleneck": [4, 11, 20, 25, 29, 77, 80], "bottom": 33, "bound": [0, 6, 14, 16, 23, 26, 27, 28, 71, 74, 83, 88, 90], "boundari": [6, 16, 28, 29, 71, 83, 85, 87, 90], "box": [7, 20], "bpru": 94, "brahma": 74, "branch": [12, 21, 24, 29, 71], "breadth": 12, "break": [12, 26, 29, 70, 74, 81, 94, 100], "breakdown": [73, 74, 75, 76], "breviti": 20, "bridg": 29, "brief": [85, 88, 98], "briefli": [35, 62], "brife": 0, "bring": [25, 26, 27, 28, 29, 96], "broadcast": [3, 26, 83], "broadcast_help": 83, "broader": [5, 94], "broadli": 28, "broken": [72, 80, 94], "brought": 29, "bsz": 84, "bu": 66, "budget": [13, 80], "buffer": [0, 1, 2, 3, 8, 9, 29, 30, 31, 65, 71, 83, 94, 99], "buffer_0": 1, "buffer_1": 1, "buffer_2": 1, "buffer_alloc": 88, "buffercast": 1, "buffercastornul": 1, "bufferdatatyp": 1, "buffermanag": 90, "buffermanagertest": 1, "bufferptr": 1, "bufferrang": 1, "buffers": 1, "bufferview": 0, "bug": [28, 94], "build": [2, 3, 5, 6, 7, 9, 10, 12, 13, 14, 16, 18, 50, 52, 54, 55, 59, 65, 70, 71, 72, 73, 77, 78, 79, 80, 82, 85, 86, 89, 90, 93, 94], "build_cach": 71, "build_config": [19, 30, 37, 50, 54, 55, 59, 71, 78, 80, 81, 85], "build_dir": 66, "build_engin": 16, "build_flags_multiple_profil": 81, "build_serialized_network": 16, "build_wheel": [20, 66, 73], "buildcacheconfig": 71, "buildconfig": [13, 19, 37, 50, 54, 55, 59, 71, 78, 80, 81, 94], "builder": [13, 16, 19, 71, 94], "builder_force_num_profil": 94, "builder_opt": 94, "built": [3, 6, 9, 16, 19, 28, 30, 60, 66, 68, 70, 71, 74, 75, 76, 81, 82, 83, 89, 90, 93, 94], "bulk": 29, "bump": 1, "bumptaskinprogress": 1, "burden": 77, "busi": 0, "button": 94, "buvnswrn": 94, "bw": 94, "byt5": [92, 94], "byte": [0, 1, 11, 71, 88], "bytestostr": 1, "c": [0, 1, 2, 5, 7, 12, 16, 18, 20, 27, 28, 31, 32, 33, 37, 56, 57, 58, 65, 71, 72, 73, 80, 83, 85, 89, 94, 97, 99, 100], "c2c": 29, "cach": [0, 1, 2, 3, 6, 10, 16, 19, 25, 26, 27, 28, 30, 31, 37, 41, 42, 44, 45, 55, 65, 69, 71, 72, 74, 75, 76, 80, 83, 88, 89, 91, 94, 95, 96, 97, 98, 100], "cache_indir": 88, "cache_indir_t": 83, "cache_indirect": [5, 83, 84, 88, 93], "cache_root": 71, "cache_transceiver_config": 71, "cachehitr": 0, "cacheindirect": 1, "cachelevel": 0, "cachelevelupd": 0, "caches": 0, "cachest": 0, "cachetransceiv": 0, "cachetransceiverconfig": [0, 71], "cachetyp": 99, "cachevalu": 1, "calcul": [0, 21, 22, 24, 27, 28, 29, 71, 74, 82, 83, 88, 90, 94], "calculate_speculative_resourc": 71, "calculatespeculativeresourc": 0, "calculatespeculativeresourcetupl": 0, "calib_batch": [60, 71, 78, 85], "calib_batch_s": [71, 78, 85], "calib_config": [60, 71, 78], "calib_dataset": [60, 71, 85, 87], "calib_max_seq_length": [60, 71, 78, 85, 87], "calib_s": [74, 87], "calibconfig": [60, 71, 78], "calibr": [17, 25, 28, 29, 30, 60, 71, 78, 94], "call": [0, 1, 3, 4, 5, 6, 7, 16, 17, 19, 27, 28, 29, 37, 53, 71, 73, 76, 78, 83, 85, 87, 88, 89, 90, 94, 96, 97, 98, 99], "callabl": [17, 53, 71, 85], "callback": [3, 53, 71], "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 41, 44, 45, 47, 50, 53, 54, 55, 56, 57, 58, 59, 60, 65, 66, 68, 70, 71, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 85, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100], "canaccessp": 1, "cancel": [0, 3, 71, 74, 94], "cancelrequest": [0, 3], "candid": [0, 6, 12, 16, 26, 27, 71], "canenqueu": 0, "canenqueuerequest": 0, "cannon": 52, "cannot": [1, 6, 11, 16, 17, 26, 28, 29, 70, 71, 80, 81, 82, 83, 90, 93, 94, 100], "cap": 76, "capabl": [21, 26, 29, 46, 66, 72, 73, 78], "capac": [0, 1, 21, 23, 25, 29, 71, 100], "capacitor_schedul": 100, "capacity_scheduler_polici": [71, 82], "capacityschedul": [97, 99, 100], "capacityschedulerpolici": [0, 71, 82, 94], "capit": [41, 43, 44, 45, 47, 48, 49, 50, 51, 55, 60, 67, 68, 76, 82, 89, 95], "caption": 84, "captur": [27, 28, 29, 71, 98], "card": [54, 59], "care": 29, "carefulli": [20, 29], "case": [0, 1, 2, 5, 6, 8, 9, 10, 12, 20, 22, 25, 26, 27, 28, 29, 30, 37, 74, 75, 76, 78, 79, 81, 83, 91, 94], "cast": [28, 83], "cast_to_dtyp": 83, "castsiz": 1, "cat": [20, 27, 29, 31, 57], "categor": [12, 28, 83], "categori": 86, "categorical_sampl": 83, "caus": [2, 3, 17, 19, 29, 30, 71, 81, 93, 94], "causal": [27, 83, 84, 98], "cautiou": 19, "caveat": 78, "cd": [14, 15, 20, 27, 66, 74, 89, 93, 95], "ceil": [1, 85], "ceil_mod": [83, 84], "ceildiv": 1, "center": [22, 23], "central": 86, "certain": [2, 7, 15, 29, 68, 72, 83], "cg": 85, "chain": 27, "challeng": [26, 29, 72], "chanc": [9, 29, 30, 82], "chang": [2, 5, 6, 8, 9, 10, 17, 19, 21, 23, 24, 27, 28, 29, 66, 70, 71, 72, 74, 81, 83, 85, 88, 90, 93, 95, 99], "channel": [29, 30, 83, 91, 94], "char": [0, 1], "charg": [6, 16, 98], "chart": 22, "chat": [12, 23, 36, 39, 41, 43, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 60, 63, 64, 67, 68, 70, 89, 94, 95], "chatbot": 59, "chatcmpl": 89, "chatglm": [70, 83, 91, 92, 94], "chatglm2": [70, 92, 94], "chatglm3": [70, 85, 92, 94], "chatglm_vers": 85, "chatglmconfig": 85, "chatglmforcausallm": 85, "chatglmgenerationsess": 88, "chatglmmodel": 85, "check": [2, 3, 29, 41, 67, 68, 71, 75, 77, 78, 80, 81, 83, 88, 89, 90, 93, 94, 96], "check_accuraci": 15, "check_config": 85, "check_gpt_mem_usag": 90, "checkbeamsearchdiversityr": 0, "checkbeamwidth": 0, "checkbeamwidtharrai": 0, "checkearlystop": 0, "checklengthpenalti": 0, "checkminp": 0, "checkmintoken": 0, "checknorepeatngrams": 0, "checknumreturnsequ": 0, "checkpoint": [14, 17, 18, 19, 20, 26, 27, 28, 30, 31, 47, 55, 65, 70, 71, 74, 76, 78, 87, 88, 89, 91, 93, 94, 96], "checkpoint_dir": [10, 13, 14, 15, 16, 19, 30, 74, 89, 93], "checkposteriorvalu": 0, "checkremotedesc": 0, "checkrepetitionpenalti": 0, "checktemperatur": 0, "checktopk": 0, "checktopp": 0, "checktoppdecai": 0, "checktoppmin": 0, "checktoppresetid": 0, "chef": 93, "chmod": 32, "choic": [0, 12, 25, 27, 29, 30, 55, 74, 77, 83, 88, 89, 98], "choos": [16, 19, 26, 28, 29, 78, 83, 94], "chose": 29, "chosen": [28, 90, 100], "chrome": 73, "chrono": 0, "chunk": [0, 8, 28, 30, 65, 69, 71, 81, 83, 88, 90, 94], "chunk_dim": 84, "chunk_length": 94, "chunk_scan": 83, "chunk_siz": [83, 85], "chunkedcontextnexttoken": 1, "chunkedcontextnexttokenshost": 1, "ci": 1, "circular": 5, "citi": [60, 89], "ckpt": [55, 74, 89], "ckpt_dir": [16, 19, 85], "ckpt_llama_3": 16, "cl": [14, 19], "claim": [1, 17], "claimpag": 1, "claimpageswithevict": 1, "clamp": [71, 94], "clamp_val": 71, "class": [0, 1, 2, 5, 6, 7, 8, 13, 14, 16, 17, 19, 25, 30, 37, 44, 45, 47, 50, 53, 54, 55, 66, 70, 71, 77, 78, 81, 83, 84, 85, 86, 87, 88, 93, 94, 96, 97, 98, 100], "class_dropout_prob": 84, "class_label": 84, "classic": [16, 29, 65], "classifi": [84, 85], "classmethod": [14, 19, 71, 84, 85, 88], "classvar": 71, "clean": [20, 29, 66, 73, 93], "clear": [29, 68, 80, 88], "clearli": [29, 82], "cli": [15, 20, 37, 65, 74, 77, 78, 80, 81, 89], "click": [32, 33], "client": [0, 3, 31, 64, 75], "client_id": 53, "clientid": 0, "clip": 83, "clip_before_cast": 83, "clip_qkv": [84, 85], "clip_vision_model": 85, "clipvisiontransform": 85, "clock": 26, "clone": [10, 20, 66, 70, 76, 89, 93, 95], "clone_input": 7, "close": [5, 19, 20, 29, 30, 81, 90], "closur": 83, "cloud": [22, 32, 33], "cls_token": 84, "cluster": [6, 16, 26, 29, 30, 31, 68, 71, 94], "cluster_info": 94, "cluster_kei": [30, 94], "cluster_s": 31, "cmake": [66, 94], "cnn_dailymail": [60, 71, 85], "co": [0, 10, 20, 27, 28, 29, 35, 62, 70, 83, 84, 89, 93], "coalesc": 53, "coast": 89, "code": [2, 5, 7, 8, 11, 12, 16, 19, 25, 26, 28, 31, 37, 56, 57, 58, 65, 70, 71, 72, 73, 74, 83, 91, 92, 93, 94, 96, 99, 100], "codebas": [8, 96], "codellama": 94, "codepath": 94, "codeqwen": 94, "coderham": 94, "cogvlm": [92, 94], "cogvlmattent": 84, "cogvlmconfig": 85, "cogvlmforcausallm": 85, "coher": [6, 94], "cohereconfig": 85, "cohereforcausallm": 85, "cold": 29, "collabor": [6, 26, 28, 29, 60, 83], "collect": [1, 7, 11, 12, 16, 26, 28, 71, 75, 83, 96], "collect_and_bia": 84, "collector": 29, "color": [59, 80], "column": [10, 83, 91], "columnlinear": [10, 14, 84], "com": [16, 19, 20, 26, 66, 83, 89, 93, 94, 95], "combin": [0, 7, 12, 23, 26, 27, 28, 29, 30, 55, 56, 57, 58, 74, 75, 78, 80, 84, 94, 98, 100], "combinedtimesteplabelembed": 84, "combinedtimesteptextprojembed": 84, "come": [6, 10, 22, 29, 76, 77, 80, 82, 90, 93], "comm": 71, "comma": [83, 88], "command": [9, 10, 14, 15, 16, 19, 20, 31, 32, 33, 56, 57, 58, 66, 70, 73, 74, 76, 81, 86, 89, 90, 93, 94, 95], "commandr": 94, "comment": 94, "commit": 28, "commmod": 0, "common": [0, 5, 8, 9, 12, 20, 28, 29, 41, 52, 70, 71, 83, 90, 99], "common_prefix": 52, "commonli": [7, 26, 31, 94], "commstat": 0, "commtyp": 0, "commun": [0, 2, 6, 11, 16, 28, 30, 60, 70, 72, 78, 83, 92, 94], "communicationmod": [0, 2], "communicationtyp": 0, "compani": 54, "compar": [1, 2, 17, 22, 23, 25, 27, 28, 29, 78, 80, 81, 82, 83, 98], "comparison": [6, 22, 26, 27, 74], "compat": [12, 19, 27, 29, 31, 66, 81, 84, 89, 92, 94, 96], "compbin": 10, "compet": 29, "compil": [6, 11, 18, 65, 68, 71, 72, 73, 74, 83, 93], "complet": [0, 1, 2, 3, 6, 8, 9, 12, 29, 34, 35, 37, 61, 62, 64, 66, 70, 71, 72, 74, 75, 76, 80, 81, 89, 94, 99, 100], "completion_token": 89, "completionoutput": [37, 54, 71], "complex": [7, 8, 12, 16, 26, 29], "compli": 31, "complic": [27, 28, 29, 96], "compon": [2, 3, 5, 16, 18, 25, 26, 27, 28, 29, 65, 91, 97], "compos": [0, 6, 29, 74], "comprehens": [20, 31, 72], "compress": [21, 28], "compris": 25, "comput": [0, 1, 4, 5, 6, 9, 12, 16, 21, 22, 23, 25, 26, 27, 28, 29, 30, 43, 47, 48, 50, 51, 53, 71, 73, 74, 77, 78, 82, 83, 90, 93, 94, 96, 97, 98, 99], "compute_relative_bia": 84, "computecontextlogit": 1, "computegenerationlogit": 1, "computenumpackedmask": 1, "concat": [14, 26, 83], "concat_kvcach": 26, "concaten": [5, 10, 17, 26, 83, 96], "conced": 52, "concept": [16, 74, 79, 94, 99], "conceptu": 1, "concern": [16, 29, 90], "conclud": 29, "conclus": 79, "concret": [29, 96], "concur": 52, "concurr": [1, 2, 12, 20, 22, 26, 27, 28, 29, 74, 94], "cond_proj_dim": 84, "conda": 94, "condit": [0, 1, 3, 6, 7, 12, 29, 74, 83, 84, 94], "condition": 83, "conditioning_embed": 84, "conditioning_embedding_dim": 84, "conduct": [5, 74], "confess": 52, "config": [0, 1, 5, 9, 10, 13, 14, 17, 19, 20, 21, 27, 28, 31, 38, 71, 74, 80, 84, 85, 86, 88, 93, 94, 96, 99], "config_class": 85, "config_dir": 85, "config_fil": [31, 71, 85], "configdict": 71, "configur": [0, 1, 2, 4, 5, 8, 12, 17, 18, 20, 23, 30, 31, 44, 45, 46, 50, 54, 55, 59, 66, 68, 71, 74, 75, 76, 79, 80, 82, 85, 88, 90, 93, 94, 98], "configuration_llama": 96, "configuration_mymodel": 96, "configuration_util": 96, "confirm": [43, 47, 48, 50, 51], "conform": 71, "congest": 29, "conjunct": 80, "connect": [0, 11, 16, 29, 76, 77, 79], "connectioninfo": 0, "connectioninfotyp": 0, "connectionmanag": 0, "connectremoteag": 0, "consecut": 6, "consequ": [2, 25, 77, 81], "conserv": [0, 82], "consid": [0, 1, 10, 12, 20, 25, 29, 59, 60, 71, 75, 80, 83, 96, 100], "consider": [19, 25, 29, 37], "consist": [7, 19, 22, 26, 71, 72, 74, 76, 83, 91, 93, 98], "consol": 32, "consolid": [12, 29], "const": [0, 1, 3], "const_iter": 1, "constant": [1, 5, 29, 83, 90], "constant_to_tensor_": 83, "constantli": [43, 47, 48, 50, 51], "constants_to_tensors_": 83, "constantthreshold": 1, "constexpr": [0, 1], "constpointercast": 1, "constrain": [6, 25], "constraint": [0, 5, 6, 25, 29, 68, 83], "construct": [0, 1, 3, 12, 16, 74, 83, 94, 98], "constructor": [0, 13, 59, 70, 89, 98], "consult": [12, 66, 73], "consum": [0, 7, 28, 29, 71, 83], "consumpt": [5, 22, 27, 30], "contact": 83, "contain": [0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 15, 16, 17, 18, 19, 26, 29, 30, 31, 33, 56, 57, 58, 67, 68, 71, 72, 74, 75, 83, 85, 88, 89, 91, 92, 94, 95, 97, 98], "container_imag": [56, 57, 58], "container_img": 31, "content": [1, 10, 19, 31, 32, 34, 35, 36, 46, 61, 62, 65, 83, 89, 90, 94], "context": [0, 2, 4, 9, 25, 27, 28, 29, 30, 65, 69, 71, 74, 79, 83, 88, 90, 93, 94, 98, 99, 100], "context_chunking_polici": [71, 82], "context_fmha": [10, 30], "context_fmha_fp32_acc": 94, "context_fmha_typ": [5, 90], "context_init": 100, "context_len": [88, 98], "context_length": [83, 84, 88, 93], "context_logit": [71, 88], "context_mem_s": 88, "context_onli": 71, "context_parallel_s": 71, "context_phas": 5, "context_pre_onli": 84, "context_request": 100, "contextchunkingpolici": [0, 71, 82, 94], "contextexecutor": 2, "contextfmha": 1, "contextidx": 0, "contextlogit": 0, "contextmanag": 70, "contextparallel": 1, "contextphaseparam": [0, 2, 71], "contextpositionid": 1, "contextprefillposit": 0, "contextrequest": 1, "contextrequestid": 2, "contextrespons": 2, "contigu": [2, 8, 77, 83, 94], "continu": [1, 3, 5, 12, 23, 25, 30, 71, 72, 78, 80, 88, 100], "contract": 74, "contrast": [6, 12, 98], "contrib": 21, "contribut": [19, 27, 28, 29, 74, 83, 94], "contributor": [26, 29, 90], "control": [0, 2, 5, 6, 7, 11, 37, 41, 42, 71, 73, 74, 76, 82, 83, 84, 88, 91, 94], "conv": 83, "conv1d": [30, 83, 84], "conv2d": [83, 84], "conv3d": [83, 84], "conv_bia": 83, "conv_kernel": 88, "conv_stat": 85, "conv_state_or_ptr": 83, "conv_transpose2d": 83, "conv_weight": 83, "conveni": [1, 14, 19, 66], "convent": [19, 83], "convers": [1, 17, 24, 25, 59, 65, 89, 94], "convert": [0, 1, 10, 13, 14, 15, 16, 17, 19, 29, 72, 74, 76, 78, 89, 93, 94, 98], "convert_and_load_weights_into_trtllm_llama": 19, "convert_checkpoint": [10, 13, 14, 15, 16, 19, 76, 77, 89, 93, 94], "convert_coneckpoint": 4, "convert_hf_mpt_legaci": 94, "convert_load_format": 71, "convert_util": 94, "convert_weights_from_custom_training_checkpoint": 19, "convkernel": 1, "convolut": [0, 88], "convtranspose2d": 84, "coordin": [12, 29, 65, 83], "copi": [0, 1, 2, 9, 12, 29, 30, 33, 71, 78, 83, 90, 94, 98], "copy_on_partial_reus": 71, "copyfrom": 1, "copyonpartialreus": 0, "copytask": 1, "copytaskmappag": 1, "copyto": 0, "copytocpu": 0, "copytogpu": 0, "copytomanag": 0, "copytopag": 1, "copytopin": 0, "copytopooledpin": 0, "core": [6, 7, 10, 13, 16, 19, 21, 22, 24, 28, 66, 70, 74, 77, 89, 93, 94, 97], "corner": 28, "coroutin": [48, 49, 71], "correct": [2, 3, 5, 10, 12, 27, 94], "correctli": [9, 83, 94, 96], "correspond": [0, 1, 2, 4, 5, 7, 8, 10, 12, 17, 19, 27, 29, 31, 71, 73, 81, 83, 84, 88, 91, 93, 94, 96], "correspondingli": 29, "corrupt": 29, "cost": [9, 16, 26, 27, 28, 29, 74, 77, 90, 94], "costli": 26, "could": [0, 2, 7, 8, 9, 15, 29, 47, 48, 49, 50, 51, 60, 71, 76, 90, 93, 94], "couldn": 80, "count": [0, 1, 6, 29, 31, 39, 40, 70, 74, 85, 89], "count_include_pad": [83, 84], "countlocallay": 1, "countlowerranklay": 1, "cours": 12, "court": [43, 47, 48, 50, 51], "cover": [20, 29, 78, 79, 81], "coverag": [29, 71], "cp312": 66, "cp_config": 71, "cp_group": [83, 84], "cp_rank": [83, 84], "cp_size": [83, 84, 87, 94], "cp_split_plugin": 83, "cpp": [2, 3, 5, 6, 16, 20, 28, 31, 57, 65, 66, 73, 74, 75, 76, 93, 94], "cpp_e2e": 88, "cpp_extens": 68, "cpp_llm_onli": 88, "cpp_onli": 66, "cpu": [0, 1, 8, 9, 10, 13, 16, 26, 27, 29, 30, 31, 53, 68, 71, 83, 90, 93, 94, 98], "cpumemusag": [0, 71], "crash": 94, "creat": [1, 2, 3, 7, 8, 9, 12, 13, 14, 16, 18, 19, 26, 31, 32, 37, 43, 47, 48, 49, 50, 51, 52, 53, 60, 61, 62, 63, 70, 71, 72, 74, 75, 76, 80, 81, 83, 84, 85, 88, 89, 90, 94, 96, 97, 98, 100], "create_allreduce_plugin": 83, "create_attention_const_param": 84, "create_builder_config": 13, "create_cuda_graph_metadata": 98, "create_execution_context": 88, "create_fake_weight": 83, "create_network": 16, "create_pytorch_model_based_executor": [99, 100], "create_runtime_default": 85, "create_sinusoidal_posit": 83, "create_sinusoidal_positions_for_attention_plugin": 83, "create_sinusoidal_positions_for_cogvlm_attention_plugin": 83, "create_sinusoidal_positions_long_rop": 83, "create_sinusoidal_positions_yarn": 83, "createloramodul": 1, "creation": [1, 71, 83, 90], "creativ": 6, "criteria": 88, "critic": [26, 29, 74, 93], "crop": 84, "cropped_pos_emb": 84, "cross": [0, 10, 11, 26, 27, 29, 71, 83, 88, 94], "cross_attent": [84, 88], "cross_attention_dim": 84, "cross_attention_mask": [84, 88], "cross_attention_mask_for_context": 88, "cross_attention_mask_for_gen": 88, "cross_attention_norm": 84, "cross_attention_norm_num_group": 84, "cross_attention_packed_mask": 84, "cross_attn_dens": [10, 30], "cross_attn_k": [10, 30], "cross_attn_q": [10, 30], "cross_attn_qkv": [10, 30], "cross_attn_v": [10, 30], "cross_kv": 83, "cross_kv_cache_block_offset": [84, 88], "cross_kv_cache_fract": [71, 88], "cross_kv_cache_gen": [84, 85], "cross_kv_length": 83, "cross_kv_reus": [84, 85], "crossattentionmask": 0, "crosskvcachefract": [0, 94], "crosskvcachestat": 0, "crucial": [12, 16, 25, 97], "ctor": 83, "ctx": 0, "ctx_request_id": 71, "ctxenginepath": 0, "ctxexecutorconfig": 0, "cu": [16, 26], "cu12": 94, "cu128": [67, 68], "cuassert": 93, "cubla": 28, "cublaslt": [30, 81], "cublasltmatmul": 28, "cublasscaledmm": 28, "cuda": [0, 1, 2, 5, 11, 16, 20, 27, 28, 29, 53, 60, 66, 67, 68, 71, 73, 74, 85, 88, 90, 93, 94, 98, 99], "cuda_arch": 66, "cuda_architectur": [20, 66], "cuda_graph_batch_s": [20, 71, 75], "cuda_graph_cache_s": 71, "cuda_graph_inst": 93, "cuda_graph_max_batch_s": 71, "cuda_graph_mod": [71, 88, 93], "cuda_graph_padding_en": [20, 28, 57, 71, 75], "cuda_hom": 68, "cuda_launch_block": 93, "cuda_stream": 93, "cuda_stream_guard": 88, "cuda_stream_sync": 83, "cudadevicegetstreampriorityrang": 1, "cudaevent_t": 1, "cudaeventdisabletim": 1, "cudagraph": 94, "cudagraphcaches": 0, "cudagraphlaunch": 93, "cudagraphmod": 0, "cudamalloc": [1, 2], "cudamallocasync": [1, 2], "cudamemcpyasync": 53, "cudamempool": 1, "cudamempoolptr": 1, "cudaprofilerapi": 73, "cudart": 93, "cudastream": 0, "cudastream_t": 1, "cudastreamcreatewithflag": 1, "cudastreamnonblock": 1, "cudastreamptr": [0, 1], "cudeviceptr": 1, "cudnn": 94, "cufil": 0, "cumemgenericallocationhandl": 1, "cumlogprob": [0, 1], "cumlogprobscba": 1, "cumsum": [83, 94], "cumsumgenerationlength": 1, "cumsumlastdim": 83, "cumsumlength": 1, "cumul": [0, 1, 71, 83], "cumulative_logprob": [37, 54, 71], "curand": 94, "curl": [31, 64, 89], "currenc": 74, "current": [0, 1, 2, 3, 5, 10, 12, 20, 25, 26, 27, 28, 29, 30, 37, 46, 59, 66, 71, 74, 78, 80, 81, 82, 83, 88, 90, 92, 94, 95, 97, 98, 99, 100], "current_stream": 93, "currentexpandindic": 1, "curv": 24, "custom": [6, 16, 19, 21, 26, 27, 29, 30, 41, 42, 44, 45, 53, 54, 55, 66, 72, 78, 81, 83, 88, 94, 97, 98], "custom_all_reduc": 94, "custom_mask": 83, "customallreduc": 94, "customized_key_dict": 17, "customized_preprocess": 17, "customizedmodulea": 17, "customizedmoduleb": 17, "cutlass": [28, 71, 94], "cxx11": 66, "cyclic": [65, 83, 88], "d": [1, 10, 31, 32, 34, 35, 36, 56, 57, 58, 59, 74, 83, 84, 89, 93, 94], "d0": 26, "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b": 74, "d_": 27, "d_6": 27, "dangl": 7, "data": [0, 1, 2, 5, 6, 8, 11, 16, 17, 21, 22, 23, 24, 25, 26, 29, 30, 52, 62, 71, 74, 75, 76, 83, 85, 92, 93, 94, 96], "data_path": 57, "data_typ": [13, 15], "datacontext": 0, "dataset": [26, 27, 28, 35, 57, 60, 62, 71, 73, 78, 94], "dataset_fil": 75, "dataset_path": 74, "datatyp": [0, 1, 6, 16, 83, 88, 91, 93], "datatypetrait": 1, "date": 19, "datetim": 71, "dbrx": [91, 92, 94], "dbrxconfig": 85, "dbrxforcausallm": 85, "dconv": 83, "de": 1, "deactiv": 37, "dead": 94, "deal": [5, 7, 93], "dealloc": [1, 8, 100], "death": [43, 47, 48, 50, 51], "debug": [0, 8, 29, 30, 31, 65, 66, 88, 90, 94], "debug_buff": 93, "debug_mod": [88, 93], "debug_tensors_to_sav": 88, "debugconfig": 0, "debuginputtensor": 0, "debugoutputtensor": 0, "debugtensor": 0, "debugtensornam": 0, "debugtensorsmaxiter": 0, "debugtensorsperiter": 0, "dec": [30, 88, 94], "decai": [0, 6, 71], "decid": [5, 15, 29, 65, 74, 79, 80, 91, 97, 100], "decilmforcausallm": 92, "decis": [29, 59, 83], "declar": [1, 6, 7, 19, 97, 99], "decltyp": [0, 1], "decod": [0, 1, 2, 5, 6, 14, 19, 26, 28, 29, 31, 41, 42, 65, 71, 74, 83, 88, 92, 94, 96, 99], "decode_batch": 88, "decode_duration_m": 71, "decode_regular": 88, "decode_retention_prior": 71, "decode_stream": 88, "decode_words_list": 88, "decode_wrapp": 98, "decodedurationm": 0, "decoder_batch": 1, "decoder_input_id": [85, 88], "decoder_language_adapter_rout": 88, "decoder_lay": 96, "decoder_start_token_id": 30, "decoderbuff": 1, "decoderenginebuff": 0, "decoderetentionprior": 0, "decoderjsonconfigstr": 0, "decoderlay": 96, "decoderlayerlist": 14, "decoderlookaheadbuff": 1, "decodermaskedmultiheadattent": 5, "decodermodel": [0, 85, 96], "decodermodelforcausallm": [14, 19, 85, 96], "decodermodelpath": 0, "decoderst": 94, "decoderxqarunn": 5, "decoding_config": 71, "decoding_typ": [20, 27, 71], "decodingbaseconfig": 71, "decodingconfig": [0, 1], "decodinginputptr": 1, "decodingit": 0, "decodinglayerworkspac": 1, "decodingmod": [0, 1, 94], "decodingoutputptr": 1, "decompos": [5, 29], "decor": 96, "decoupl": [26, 29, 90], "decreas": [21, 22, 78], "dedic": [26, 28, 29, 93], "deduc": [29, 30, 31, 94], "deep": [16, 22, 23, 73, 83, 94], "deepep": 29, "deeper": 27, "deepgemm": 20, "deeplearn": [16, 83, 93], "deepli": 29, "deepseek": [29, 31, 64, 73, 75, 92, 94], "deepseek_v1": 94, "deepseek_v2": 94, "deepseek_v3": [26, 94], "deepseekforcausallm": 85, "deepseekv1config": 85, "deepseekv2": 83, "deepseekv2attent": 84, "deepseekv2config": 85, "deepseekv2forcausallm": 85, "deepseekv3forcausallm": 92, "deepseekv3routingimpl": 28, "deepspe": 15, "def": [7, 14, 16, 17, 19, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 67, 68, 76, 78, 81, 82, 89, 93, 95, 96, 100], "default": [0, 1, 2, 3, 4, 5, 6, 9, 15, 17, 19, 27, 28, 30, 31, 32, 37, 55, 60, 65, 66, 71, 73, 75, 78, 79, 80, 81, 82, 83, 85, 88, 89, 90, 91, 93, 94, 96, 98], "default_net": 83, "default_plugin_config": 85, "default_trtnet": 16, "defaultvalu": 1, "defer": 83, "defin": [0, 1, 3, 5, 7, 12, 15, 16, 17, 18, 19, 20, 23, 29, 30, 72, 74, 81, 83, 84, 91, 94, 96, 98], "definit": [3, 5, 8, 18, 19, 26, 65, 72, 83, 93], "deftruth": 94, "degrad": [0, 2, 30, 78], "degre": [29, 43, 47, 48, 50, 51, 53, 75, 78, 81], "delai": [29, 75, 94], "deleg": [83, 98], "delet": [0, 1, 29, 86, 93], "deliv": [20, 21, 24, 26, 27, 75], "delta": [0, 26, 27, 83, 84], "delta_bia": 83, "delta_softplu": 83, "delv": 28, "demand": [26, 28, 29], "demo": [26, 35, 62], "demonstr": [3, 17, 22, 26, 29, 70, 76, 78, 80, 81], "denmark": 52, "denois": 84, "denot": 12, "dens": [4, 5, 10, 15, 17, 83], "dense_4h_to_h": 17, "dense_bia": 84, "dense_h_to_4h": 17, "densiti": 25, "dep": 66, "departur": 52, "depend": [0, 2, 3, 5, 6, 7, 12, 15, 23, 29, 31, 68, 71, 75, 76, 78, 81, 83, 90, 93, 94, 99], "deploi": [12, 15, 29, 31, 65, 68, 71, 72], "deplot": [92, 94], "deploy": [25, 26, 29, 72, 74, 78, 89, 94], "deprec": [30, 71, 72, 74, 94], "deprecationwarn": 74, "depriv": 7, "depth": 12, "dequ": [0, 1], "dequant": [5, 11, 65, 83], "deregistermemori": 0, "deriv": [16, 17, 83, 90, 97], "desc": 0, "descendli": 6, "describ": [0, 5, 6, 8, 9, 10, 12, 14, 16, 17, 18, 20, 24, 29, 33, 35, 62, 66, 70, 74, 75, 81, 83, 91, 93, 98], "descript": [0, 1, 6, 10, 31, 55, 65, 74, 75, 81, 83, 98], "descriptor": 71, "deseri": [0, 19, 53], "deserializeadditionalmodeloutput": 0, "deserializeadditionaloutput": 0, "deserializeagentst": 0, "deserializebool": 0, "deserializecachest": 0, "deserializecachetransceiverconfig": 0, "deserializecommst": 0, "deserializecontextphaseparam": 0, "deserializedatatransceiverst": 0, "deserializedebugconfig": 0, "deserializedecodingconfig": 0, "deserializedecodingmod": 0, "deserializedisservingrequeststat": 0, "deserializedynamicbatchconfig": 0, "deserializeeagleconfig": 0, "deserializeexecutorconfig": 0, "deserializeextendedruntimeperfknobconfig": 0, "deserializeexternaldrafttokensconfig": 0, "deserializeguideddecodingconfig": 0, "deserializeguideddecodingparam": 0, "deserializeinflightbatchingstat": 0, "deserializeiterationstat": 0, "deserializeiterationstatsvec": 0, "deserializekvcacheconfig": 0, "deserializekvcacheretentionconfig": 0, "deserializekvcachestat": 0, "deserializelookaheaddecodingconfig": 0, "deserializeloraconfig": 0, "deserializemodeltyp": 0, "deserializemropeconfig": 0, "deserializeorchestratorconfig": 0, "deserializeoutputconfig": 0, "deserializeparallelconfig": 0, "deserializepeftcacheconfig": 0, "deserializeprompttuningconfig": 0, "deserializerequest": 0, "deserializerequestperfmetr": 0, "deserializerequeststag": 0, "deserializerequeststat": 0, "deserializerequeststatsperiter": 0, "deserializerequeststatsperiterationvec": 0, "deserializerespons": 0, "deserializeresult": 0, "deserializesamplingconfig": 0, "deserializeschedulerconfig": 0, "deserializesocketst": 0, "deserializespecdecfastlogitsinfo": 0, "deserializespecdecodingstat": 0, "deserializespeculativedecodingconfig": 0, "deserializestaticbatchingstat": 0, "deserializestr": 0, "deserializetensor": 0, "deserializetimepoint": 0, "deserializetokenrangeretentionconfig": 0, "design": [1, 11, 12, 16, 17, 19, 20, 25, 26, 27, 28, 70, 76, 89, 97, 98, 99], "desir": [3, 75, 83, 89, 98], "destin": [56, 57, 58], "destroi": [1, 90], "destroyipcmemori": 1, "destructor": 1, "detail": [0, 3, 5, 11, 12, 14, 16, 20, 26, 28, 29, 30, 31, 37, 41, 46, 60, 65, 74, 75, 76, 78, 82, 83, 85, 90, 93, 94, 97, 98, 99], "detect": [0, 3, 29, 31, 71, 83, 94], "detect_format": 17, "determin": [0, 1, 5, 6, 10, 11, 19, 27, 29, 71, 77, 78, 82, 83, 85, 91, 97, 99, 100], "determinenumpag": 1, "determinist": [27, 81, 94], "detoken": [71, 94, 97], "detokenizedgenerationresultbas": 71, "dev": [29, 67, 68, 94], "devel": [32, 33, 66], "develop": [14, 15, 16, 19, 26, 27, 29, 32, 43, 47, 48, 50, 51, 65, 66, 70, 72, 76, 83, 92, 94, 96], "deviat": [29, 75], "devic": [0, 1, 2, 29, 53, 71, 78, 83, 85, 87, 88, 93], "device_id": 88, "device_map": 87, "device_memory_size_v2": 90, "device_request_typ": 85, "deviceallocationnvl": 1, "devicecach": 1, "devicecacheperc": 0, "deviceid": [0, 1, 2], "dgx": [6, 16, 20, 28], "di": [27, 29], "diagon": 83, "diagram": [12, 28], "diamond": [26, 28], "dict": [14, 17, 19, 71, 83, 85, 88, 94, 96, 99], "dict_kei": 93, "dictat": 80, "dictionari": [15, 17, 71, 84], "didn": 80, "differ": [0, 1, 2, 4, 5, 6, 8, 9, 11, 14, 15, 16, 17, 19, 20, 25, 27, 28, 29, 30, 35, 62, 66, 70, 71, 72, 74, 76, 78, 80, 81, 83, 85, 88, 90, 91, 94, 98], "differenti": 83, "difftyp": 1, "diffus": [35, 62, 84, 94], "diffusersattent": 84, "digit": 72, "dilat": [83, 84], "dim": [0, 1, 83, 84, 85, 88, 93], "dim0": 83, "dim1": 83, "dim_head": 84, "dim_in": 84, "dim_out": 84, "dim_rang": 83, "dimems": 1, "dimens": [0, 1, 5, 6, 10, 28, 29, 83, 84, 85, 90, 93, 94, 96], "dimension": 83, "diminish": 29, "dimrang": 83, "dimtype64": [0, 1], "dir": [37, 66, 70], "direct": [0, 2, 11, 19, 68, 93], "directli": [0, 2, 6, 7, 12, 16, 19, 27, 28, 29, 33, 37, 66, 70, 74, 81, 82, 83, 89, 94, 98, 100], "directori": [0, 3, 14, 15, 16, 17, 19, 29, 30, 56, 57, 58, 66, 71, 74, 75, 76, 85, 88, 89, 94, 96], "disabl": [0, 1, 5, 6, 9, 13, 17, 29, 30, 71, 74, 78, 81, 82, 83, 86, 88, 90, 94], "disable_forward_chunk": 85, "disable_kv_cach": 88, "disable_overlap_schedul": [28, 71], "disable_weight_only_quant_plugin": 85, "disable_xqa": 5, "disablelookahead": 1, "disablelookaheaddecod": 1, "disableseamlesslookaheaddecod": 1, "disadvantag": [19, 77], "disagg_executor": 0, "disaggexecutororchestr": [0, 2], "disaggreg": [0, 65, 71, 94], "disaggregated_param": 71, "disaggregatedparam": 71, "disaggserverbenchmark": [2, 94], "disaggserverutil": 2, "discard": 78, "disclaim": [27, 76, 78, 80, 81], "disclosur": 94, "disconnect": 94, "discourag": [0, 6, 71], "discov": [16, 68], "discrep": [66, 96], "discuss": [5, 27, 29, 76, 78, 81, 82, 94], "disk": [3, 19, 47, 50, 66, 70], "dispatch": [0, 4, 19, 26, 29, 37], "displai": [29, 71], "disservingrequeststat": 0, "disservingstat": 0, "dist": [20, 57, 68, 73, 74, 75, 76], "distanc": [5, 83], "distil": 94, "distinct": [8, 10, 12, 26, 83], "distinguish": 9, "distribut": [1, 4, 5, 6, 16, 26, 29, 41, 42, 74, 83, 88, 90], "distserv": 2, "disturb": 52, "dit": [85, 94], "div": 83, "dive": [27, 72, 73], "divers": [0, 6, 73], "diversity_penalti": 6, "divid": [17, 27, 29, 83, 94], "divup": 83, "dl": 25, "dlsym": 0, "do": [1, 2, 7, 17, 19, 20, 25, 26, 27, 28, 29, 37, 65, 68, 71, 76, 78, 81, 83, 89, 93, 96, 98], "do_cross_attent": [83, 84], "do_layer_norm_befor": 15, "do_sampl": 6, "doc": [1, 16, 20, 24, 26, 29, 33, 78, 81, 83, 93, 94], "docker": [20, 56, 57, 58, 65, 89, 93, 94], "docker_run_arg": 20, "dockerfil": [32, 66], "document": [0, 2, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 19, 22, 23, 25, 27, 29, 34, 35, 36, 37, 38, 39, 40, 46, 61, 62, 63, 66, 68, 69, 73, 75, 76, 82, 83, 90, 91, 93, 97, 98], "doe": [0, 2, 5, 10, 12, 19, 20, 21, 28, 29, 30, 74, 75, 81, 83, 88, 90, 92, 94, 96, 100], "doesn": [1, 5, 26, 32, 37, 74, 80, 81], "dollar": 74, "domain": [11, 29], "domin": [26, 29, 94], "don": [12, 19, 28, 29, 32, 77, 81, 83], "done": [1, 9, 16, 20, 28, 29, 72, 74, 78, 80, 83, 86, 96], "dongjiyingdji": 94, "dora": [30, 83, 84], "dora_plugin": [10, 30, 83], "dot": [17, 26, 83], "doubl": [0, 22, 79, 81, 93], "down": [0, 2, 3, 10, 21, 27, 28, 29, 59, 72, 77, 83, 88], "down_proj": 17, "download": [18, 56, 57, 58, 59, 66, 67, 68, 70, 74, 76, 89, 93, 94], "downscale_freq_shift": 84, "downsid": 81, "downstream": 91, "dp": [20, 21, 24, 26, 27, 28, 94], "dp8": [26, 28], "dprank": 0, "dpsize": 0, "dq": 65, "draft": [0, 1, 26, 27, 30, 65, 71, 88, 94], "draft_indic": 85, "draft_len": 85, "draft_path": 88, "draft_prob": 85, "draft_target_model": 12, "draft_token": [71, 85], "draft_tokens_extern": [30, 85], "draftacceptancethreshold": 1, "draftbuff": 1, "drafter": [12, 71], "draftindic": 1, "draftlen": 1, "draftlogit": 1, "draftoverhead": 0, "draftparticipantid": 0, "draftpath": 1, "draftpathshost": 1, "draftprob": 1, "draftrequestid": 0, "drafttoken": [0, 1], "drafttokenid": 1, "drafttokensextern": 1, "dram": [0, 16, 71], "drastic": 28, "dreamgenx": 94, "drive": [16, 74], "driven": [29, 72], "driver": [29, 90, 94], "drop": [27, 28, 78, 80, 82], "dropout": 84, "dropout_prob": 84, "dry_run": [30, 71, 94], "dst": 1, "dstate": 83, "dstdesc": 0, "dsttype": 1, "dt_proj": 83, "dt_rank": 83, "dtype": [1, 7, 10, 13, 14, 15, 16, 19, 71, 74, 76, 77, 83, 84, 85, 86, 87, 88, 93, 94, 99], "dual": 66, "due": [0, 12, 19, 23, 26, 28, 29, 66, 74, 76, 80, 82, 88, 94, 98], "dummi": [71, 76, 94], "dump": [0, 3, 29, 66, 71], "dump_debug_buff": 88, "duplic": [28, 94], "duplicate_data": 83, "durat": [0, 29, 76], "duration_m": 71, "durationm": 0, "dure": [0, 1, 5, 6, 7, 11, 12, 13, 16, 24, 26, 27, 28, 29, 30, 66, 71, 73, 74, 81, 82, 88, 90, 93, 98, 99], "dynam": [0, 26, 27, 29, 30, 71, 74, 83, 85, 88, 90, 94, 100], "dynamic_batch_config": 71, "dynamic_batch_moving_average_window": 71, "dynamic_quant_bf16tonvfp4": 26, "dynamic_tree_max_topk": [44, 45, 71], "dynamicbatchconfig": [0, 71], "dynamicbatchmovingaveragewindow": 0, "dynamicbatchsizeconfig": 0, "dynamicdecodelay": 1, "dynamicqu": 26, "dynamictreemaxtopk": 0, "dynamictreemaxtopkhost": 1, "dynlibload": 0, "e": [0, 2, 3, 5, 8, 9, 10, 11, 17, 27, 28, 31, 32, 53, 56, 57, 58, 66, 71, 73, 83, 86, 88, 91, 93, 94, 96], "e2": [28, 65], "e4m3": [11, 22], "e5m2": 22, "each": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 20, 26, 27, 28, 29, 30, 31, 37, 53, 56, 57, 58, 71, 74, 75, 76, 77, 80, 81, 82, 83, 84, 86, 88, 90, 91, 93, 94, 97, 98, 99, 100], "eager": [28, 72, 94], "eagl": [0, 1, 30, 41, 42, 44, 65, 71, 85, 88, 94], "eagle2": [41, 42], "eagle3_one_model": 71, "eagle_choic": [44, 45, 71, 88], "eagle_dynamic_tree_max_top_k": 88, "eagle_posterior_threshold": 88, "eagle_temperatur": 85, "eagle_use_dynamic_tre": 88, "eaglechoic": [0, 1], "eagleconfig": [0, 1, 85], "eagledecodingconfig": [44, 45, 71], "eagleforcausallm": 85, "eagleinput": 1, "eaglelastinput": 1, "eaglenetctxcontextlengthshost": 1, "eaglenetctxpastkeyvaluelengthshost": 1, "eaglenetctxrequesttypeshost": 1, "eaglenetgencontextlengthshost": 1, "eaglenetgenpastkeyvaluelengthshost": 1, "eaglenetgenrequesttypeshost": 1, "ealge2": 27, "earli": [88, 93, 94], "earlier": [0, 15, 78, 93], "early_stop": [6, 71, 88, 94], "early_stop_criteria": 88, "earlystop": [0, 1, 6], "eas": [18, 29, 72, 75], "easi": [25, 29, 76], "easier": [16, 19, 20, 27, 29, 74], "easili": [17, 18, 20, 26, 29, 72, 83], "east": [14, 16, 93], "eastern": 89, "ebnf": [0, 3, 71], "echo": [31, 32, 33, 57, 58], "eddi": 94, "edg": 22, "edit": [12, 66], "ef648e7489c040679d87ed12db5d3214": 89, "effect": [0, 2, 6, 11, 12, 26, 27, 28, 30, 68, 71, 78, 80, 81], "effici": [4, 5, 6, 9, 12, 16, 18, 26, 27, 28, 29, 30, 35, 43, 47, 48, 50, 51, 62, 90, 92, 95, 97, 98, 99], "effort": [12, 15, 27, 28, 29, 60, 78, 94], "eg": 75, "eight": [20, 21], "einop": 83, "einstein": 83, "einsum": 83, "einsum_eq": 83, "either": [0, 1, 2, 3, 18, 26, 28, 47, 50, 60, 71, 83, 90, 93, 94], "element": [0, 1, 5, 6, 10, 11, 29, 71, 83, 84, 91], "element_typ": 1, "elementwis": [7, 83], "elementwise_affin": 84, "elementwise_binari": 83, "elementwise_sub": 7, "elementwise_sum": 7, "elementwiseoper": [7, 83], "eleutherai": 74, "elif": 100, "elimin": [2, 12, 26, 28, 30, 72, 74, 78, 80, 94], "ellipsi": 83, "els": [0, 16, 17, 19, 37, 53, 55, 60, 83, 93, 100], "elsinor": 52, "emb": [16, 62, 84], "embark": 72, "embed": [0, 9, 14, 27, 30, 71, 74, 83, 88, 94, 96, 98], "embed_dim": 84, "embed_posit": 84, "embed_positions_for_gpt_attent": 84, "embed_positions_for_gpt_attention_loc": 84, "embed_positions_loc": 84, "embed_token": [17, 96], "embedding_bia": 71, "embedding_dim": 84, "embedding_multipli": 85, "embedding_parallel_mod": 71, "embedding_scal": 85, "embedding_sharding_dim": [15, 85], "embeddingbia": [0, 1], "embeddingt": [0, 1], "emerg": [25, 26, 29], "emit": 71, "emphasi": 15, "empir": 29, "emploi": [12, 29, 97, 100], "empow": 26, "empti": [0, 1, 12, 37, 83, 94, 100], "emptybuff": 1, "emptygenslot": 0, "emptytensor": 1, "emul": [83, 94], "en": 94, "enabl": [0, 2, 3, 5, 6, 7, 10, 11, 12, 13, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 37, 43, 49, 51, 65, 66, 67, 68, 71, 74, 76, 80, 82, 83, 84, 85, 86, 88, 89, 91, 93, 94, 96, 98, 99], "enable_allreduc": 83, "enable_attention_dp": [20, 29, 31, 57, 71], "enable_batch_size_tun": 71, "enable_block_reus": [31, 44, 45, 52, 55, 71], "enable_build_cach": [71, 94], "enable_chunked_context": [88, 94], "enable_chunked_prefil": [71, 94], "enable_context_fmha_fp32_acc": [71, 88], "enable_debug_output": [30, 71, 93], "enable_forward_chunk": 85, "enable_fp8": [11, 60], "enable_if_t": 1, "enable_iter_perf_stat": [31, 71], "enable_iter_req_stat": 71, "enable_kv_cache_reus": 9, "enable_layerwise_nvtx_mark": 71, "enable_lora": [59, 71], "enable_max_num_tokens_tun": [71, 94], "enable_min_lat": 71, "enable_multi_devic": 94, "enable_nvfp4": 60, "enable_overlap_schedul": 31, "enable_partial_reus": 71, "enable_prompt_adapt": [71, 94], "enable_qkv": 84, "enable_tqdm": 71, "enable_trt_overlap": 94, "enable_trtllm_sampl": 71, "enable_ucx": 94, "enable_xqa": 94, "enableattentiondp": [0, 1], "enablebatchsizetun": 0, "enableblockreus": [0, 9], "enablechunkedcontext": 0, "enablecontextfmhafp32acc": 0, "enabled_with_fp32_acc": 5, "enablelookaheaddecod": 1, "enablemaxnumtokenstun": 0, "enablepartialreus": 0, "enableseamlesslookaheaddecod": [0, 1], "enabletrtoverlap": 0, "enc": [30, 88, 94], "enc_dec": 6, "encapsul": [5, 6, 16, 83], "encdecmodelrunn": 88, "encod": [0, 5, 6, 22, 26, 30, 71, 83, 88, 91, 92, 94], "encode_base64_content_from_url": 62, "encoded_vocab": [0, 3], "encodedvocab": [0, 3], "encoder_hidden_st": [84, 85], "encoder_input_featur": 88, "encoder_input_id": 88, "encoder_input_len_rang": 94, "encoder_input_length": [83, 84, 88], "encoder_language_adapter_rout": 88, "encoder_max_input_length": [84, 88], "encoder_output": [84, 85, 88], "encoder_output_length": 88, "encoder_run": 88, "encoderenginebuff": 0, "encoderhiddens": 1, "encoderinputfeatur": 0, "encoderinputtokenid": 0, "encoderjsonconfigstr": 0, "encoderlen": 0, "encodermodel": [0, 85], "encodermodelpath": 0, "encoderoutput": 0, "encoderoutputlength": 0, "encount": [17, 20, 68, 93], "encourag": [0, 6, 19, 29, 71], "end": [0, 1, 5, 6, 16, 27, 30, 44, 45, 50, 54, 55, 60, 71, 72, 74, 78, 81, 82, 83, 89, 94, 99], "end_dim": 83, "end_id": [71, 88, 94], "end_token": [0, 71], "endeavor": [26, 29], "endid": [0, 1], "endpoint": [39, 40, 71, 89, 94], "endswith": 17, "enforc": [76, 83], "engin": [0, 1, 2, 3, 5, 6, 7, 10, 12, 13, 18, 19, 24, 26, 27, 28, 29, 30, 31, 37, 47, 50, 59, 65, 68, 71, 75, 77, 78, 80, 81, 82, 83, 85, 88, 90, 93, 94], "engine_buff": 88, "engine_dir": [13, 14, 15, 16, 19, 71, 74, 76, 88, 89, 93], "engine_inspector": 88, "engine_llama_3": 16, "engine_nam": 88, "engine_output": 30, "engineaddr": 1, "enginebuff": [0, 1], "enginefilenam": 1, "engineinput": 1, "engineoutput": 1, "enginepath": 1, "engines": 1, "enhanc": [4, 6, 12, 20, 26, 27, 28, 29, 72, 82, 90, 95, 98], "enjoi": [33, 43, 47, 48, 50, 51, 53], "enough": [5, 9, 20, 27, 80, 90, 97, 100], "enqueu": [0, 3, 16, 88, 90, 94], "enqueuecontext": 0, "enqueuegener": 0, "enqueuerequest": [0, 2, 3], "ensur": [2, 3, 4, 7, 19, 27, 29, 66, 71, 74, 80, 86, 96, 99], "enter": [7, 32, 75, 80, 99], "enterpris": 46, "entir": [0, 3, 10, 16, 21, 26, 29, 71, 72, 74, 75, 83, 90, 99], "entri": [0, 10, 41, 51, 67, 68, 74, 83, 89, 94], "entrypoint": [32, 70, 76], "enum": [0, 1, 2], "enumer": [0, 1, 49, 53, 95], "env": [31, 34, 35, 36, 38, 39, 40, 74], "envelop": [29, 54], "environ": [6, 12, 20, 26, 29, 35, 56, 57, 58, 62, 65, 66, 68, 73, 74, 76, 78, 80, 81, 93, 94, 95, 98], "environment": 17, "eo": [6, 71], "eof": [20, 27, 29, 31, 57], "eos_id": 29, "eos_token_id": [3, 88], "ep": [4, 20, 26, 27, 31, 74, 83, 84], "ep2": 26, "ep2tp4": 26, "ep32": 29, "ep4": 29, "ep4tp2": 26, "ep8": [28, 29], "ep8tp8": 26, "ep_load_balanc": 29, "ep_siz": [29, 31, 38], "epsilon": [0, 83], "eq": 83, "equal": [0, 1, 3, 4, 28, 29, 30, 37, 77, 83, 84, 90], "equal_progress": [71, 82], "equat": [24, 83], "equip": [2, 18], "equival": [26, 28, 78, 83, 96], "equvili": 30, "erenup": 94, "err": [56, 57, 58], "error": [0, 2, 3, 10, 19, 28, 30, 31, 60, 65, 66, 68, 71, 76, 80, 90, 94], "errorcod": 70, "errormsg": 0, "especi": [7, 27, 29, 30, 43, 47, 48, 50, 51, 53, 77, 80, 99], "essenti": [12, 29, 74], "establish": [28, 29], "estim": [29, 60, 74, 94, 100], "et": 21, "etc": [0, 1, 12, 29, 68, 71, 73, 78, 81, 88, 90, 93, 96], "ethnzhng": 94, "eval": 46, "evalu": [11, 22, 23, 28, 65, 94], "even": [2, 5, 6, 16, 19, 25, 26, 29, 30, 52, 76, 80, 83, 90], "evenli": [4, 26], "event": [0, 1, 41, 42, 65, 71], "event_buffer_max_s": [52, 71], "event_id": 52, "eventbuffermaxs": 0, "eventid": 0, "eventptr": 1, "ever": [0, 81], "everi": [0, 3, 17, 26, 28, 29, 74, 76, 77, 83, 88], "everyon": 27, "everyth": 16, "evict": [0, 1, 8, 9, 10, 27, 72, 74, 76, 80], "evolv": [5, 19, 26, 72, 91, 99], "ex": [57, 58], "exact": [5, 90], "exam": 26, "examin": [12, 29], "exampl": [0, 5, 6, 7, 9, 12, 13, 14, 18, 19, 21, 23, 25, 27, 29, 31, 37, 46, 53, 56, 60, 65, 66, 70, 71, 75, 76, 77, 78, 79, 80, 81, 82, 83, 88, 89, 90, 91, 92, 93, 94, 95, 96, 98, 100], "example_logits_processor": 53, "exaon": [17, 92, 94], "exc": 49, "exce": [0, 2, 71, 82, 83], "exceed": [0, 90], "except": [0, 3, 5, 6, 19, 26, 27, 29, 30, 55, 77, 83, 93, 94], "excess": [5, 29], "exchang": 71, "excit": [43, 47, 48, 49, 50, 51], "exclud": [71, 78, 83, 94], "exclude_input_from_output": 71, "exclude_modul": [15, 71, 94], "excludeinputfromoutput": 0, "exclus": [1, 6, 91, 94], "exec": 73, "execut": [0, 2, 3, 6, 10, 12, 16, 18, 19, 26, 28, 29, 65, 71, 72, 73, 74, 80, 82, 83, 88, 89, 90, 97, 100], "executor": [1, 2, 9, 12, 13, 18, 37, 53, 59, 65, 71, 72, 74, 82, 88, 90, 94, 97], "executor_config": 99, "executorconfig": [0, 3, 13], "executorexampledisaggreg": 2, "executorexamplefastlogit": 94, "exhaust": [0, 18], "exist": [1, 6, 9, 10, 12, 17, 19, 26, 28, 29, 30, 52, 68, 71, 74, 88, 94, 98], "exit": [29, 75, 88], "exp": 83, "expand": [0, 23, 25, 27, 83, 88, 94], "expand_dim": 83, "expand_dims_lik": 83, "expand_mask": 83, "expand_shap": 83, "expans": 83, "expect": [0, 5, 6, 11, 14, 16, 17, 19, 23, 27, 29, 30, 37, 56, 57, 58, 65, 71, 74, 76, 79, 83, 93, 94], "expens": [3, 12, 72, 77, 78, 82], "experi": [12, 24, 25, 26, 28, 29, 70, 72, 73, 74, 93], "experiment": [5, 6, 12, 17, 27, 31, 56, 57, 58, 65, 74, 91, 94, 95], "expert": [10, 20, 31, 51, 65, 71, 81, 94], "expert_statist": 29, "expert_statistic_eplb": 29, "expert_statistic_iter_rang": 29, "expert_statistic_path": 29, "expertid": 29, "expertis": [26, 28, 29], "expir": 0, "explain": [6, 16, 18, 28, 80, 83, 90, 91, 97, 98], "explan": [20, 28, 81, 88, 90], "explicit": [0, 1, 12, 29, 83, 94], "explicit_draft_token": [12, 30, 85], "explicitdrafttoken": [0, 1], "explicitdrafttokensinput": 1, "explicitdrafttokenslastinput": 1, "explicitdrafttokensmodul": 1, "expliciteosstop": 0, "explicitli": [1, 2, 7, 12, 16, 17, 28, 29, 30, 31, 37, 71, 94], "explor": [12, 26, 28, 29, 72], "expon": 22, "exponenti": 12, "export": [2, 11, 15, 19, 20, 26, 27, 29, 30, 31, 39, 40, 56, 57, 58, 74, 87, 88, 93, 94], "export_fmt": 95, "expos": [0, 6, 16, 33, 66, 78, 94], "express": [0, 3, 71, 83], "extend": [0, 3, 9, 16, 26, 27, 28, 29, 71, 81, 83, 94], "extended_runtime_perf_knob_config": [71, 94], "extendedruntimeperfknobconfig": [0, 71], "extens": [15, 18, 68, 72, 74, 94], "extern": [0, 7, 8, 17, 88, 90], "external_checkpoint_dir": 17, "external_kei": 17, "external_weight": 17, "externaldrafttoken": 0, "externaldrafttokensconfig": [0, 1], "externaldrafttokensinput": 1, "externalstream": 53, "extra": [0, 2, 5, 9, 12, 15, 20, 26, 27, 30, 31, 38, 68, 71, 75, 77, 78, 88, 94], "extra_arg": 57, "extra_data": 71, "extra_id": 9, "extra_llm_api_opt": [20, 27, 29, 31, 38, 57, 74, 75], "extra_llm_api_options_eplb": 29, "extra_resource_manag": 71, "extra_token": 84, "extract": [0, 3, 29, 66, 73, 79, 83, 88], "extrapol": 83, "extrem": [16, 26, 29, 78, 80, 81], "f": [0, 5, 6, 32, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 67, 68, 71, 73, 76, 82, 83, 89, 93, 95], "face": [3, 10, 13, 18, 19, 29, 37, 71, 74, 85, 89, 94], "facilit": [7, 12, 29, 89], "fact": [72, 74, 81], "factor": [25, 28, 29, 77, 78, 83, 84, 90, 91], "factori": [19, 71, 88, 94], "factual": 6, "fail": [29, 71, 88, 90, 93, 100], "failur": [17, 29, 94], "fairli": 16, "fairseq": [92, 94], "fake": [9, 94], "fakebuff": 1, "falcon": [15, 25, 70, 74, 91, 92, 94], "falconconfig": 85, "falconforcausallm": 85, "falconmodel": 85, "fall": [11, 68, 75, 94], "fallback": 17, "fals": [0, 1, 2, 3, 5, 6, 7, 9, 15, 26, 28, 30, 31, 46, 52, 55, 57, 71, 83, 84, 85, 86, 87, 88, 94], "false_output_valu": 83, "false_valu": 83, "famili": [5, 17, 29, 92, 94], "familiar": [6, 16, 70, 76, 77, 79, 89], "famou": [6, 60], "faq": 65, "far": [0, 3, 27], "fast": [0, 5, 8, 12, 29, 71, 74, 77, 94], "fast_build": [30, 71, 94], "fastapi": 94, "fastapi_serv": 94, "faster": [5, 19, 22, 23, 27, 28, 30, 75, 76, 83], "fasterdecod": 55, "fastlogit": 0, "fault": [29, 94], "favor": 94, "favorit": 59, "fc": [15, 16, 17, 93], "fc_gate": 84, "fc_gate_dora": 84, "fc_gate_lora": 84, "fc_gate_plugin": 84, "featur": [0, 2, 3, 5, 7, 8, 10, 11, 12, 15, 16, 17, 19, 25, 26, 27, 28, 29, 30, 56, 57, 58, 65, 66, 74, 78, 80, 81, 82, 83, 86, 88, 92, 95, 96, 98], "feature_dim": 88, "februari": 28, "fed": [75, 85], "feed": 83, "feedback": [29, 94], "feedforward": 4, "feel": 59, "fetch": [0, 27, 31, 97], "few": [9, 16, 19, 25, 28, 29, 80], "fewer": [5, 12, 21, 98], "ffn": [4, 26], "ffn_hidden_s": 84, "fhma": 94, "field": [0, 6, 15, 19, 31, 33, 37, 71, 72, 74, 78, 85, 86, 91, 94, 98], "field_nam": 71, "fifo": 29, "figur": [26, 27, 29], "file": [0, 3, 4, 5, 7, 9, 15, 16, 17, 19, 20, 27, 29, 30, 31, 39, 40, 68, 71, 73, 74, 75, 88, 89, 94, 96], "filepath": 1, "filesystem": [0, 1], "fill": [1, 17, 33, 43, 47, 48, 50, 51, 83, 98], "fill_attention_const_params_for_long_rop": 84, "fill_attention_const_params_for_rop": 84, "fill_attention_param": 84, "fill_none_tensor_list": 84, "fill_valu": [53, 83], "fillemptyfieldsfromruntimedefault": 0, "filloper": 83, "filltaskstensor": 1, "filter_medusa_logit": 88, "final": [0, 1, 10, 26, 27, 29, 30, 31, 32, 37, 83, 100], "final_logit_softcap": 85, "final_output_id": 88, "finalize_decod": 88, "find": [20, 28, 29, 78, 83, 93, 94], "find_best_medusa_path": 88, "fine": [12, 20, 28, 29, 74, 81, 84], "finer": 7, "finetun": 26, "finish": [0, 1, 3, 6, 8, 19, 27, 29, 37, 54, 70, 71, 72, 74, 88, 97, 99], "finish_reason": [54, 71, 89, 94], "finishedst": 1, "finishedsum": 1, "finishreason": [0, 1, 94], "first": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 18, 23, 25, 27, 28, 29, 30, 31, 32, 68, 70, 71, 74, 75, 76, 78, 80, 81, 82, 83, 90, 93, 94, 96, 98, 99, 100], "first_come_first_serv": [71, 82], "first_gen_token": 71, "first_lay": 88, "firstgentoken": 0, "firstit": 0, "firstli": [28, 29, 32, 80, 90], "firstscheduledtim": 0, "firsttokentim": 0, "fit": [1, 5, 21, 22, 71, 77, 78, 100], "fitting_request": 100, "fix": [8, 10, 12, 28, 74, 90], "fjosw": 94, "flag": [0, 1, 3, 5, 10, 19, 24, 29, 31, 37, 65, 74, 78, 79, 80, 82, 83, 90, 94], "flags_siz": 1, "flan": [91, 92], "flash": [5, 16], "flashattent": [5, 16, 89], "flashinf": 98, "flashinferattent": 98, "flashmla": [27, 94], "flatten": [1, 10, 24, 29, 83, 84], "flattenedinouts": 1, "flattenn": 1, "flayer": 7, "flayerinfomemo": 7, "flexibl": [12, 19, 26, 29, 37, 66], "flight": [1, 18, 65, 74, 80, 82, 89, 90, 94], "flip": 83, "flip_sin_to_co": 84, "float": [0, 1, 6, 13, 15, 16, 22, 53, 71, 82, 83, 84, 85, 88, 91], "float16": [7, 10, 13, 14, 15, 19, 30, 77, 83, 85, 86, 89, 93], "float2": 83, "float32": [0, 15, 30, 83, 84, 85, 86], "floattensor": 96, "floattyp": [0, 1], "floor_div": 83, "floordiv": 83, "flop": 28, "flow": [7, 19, 26, 28, 76, 77, 78, 80, 81, 94, 97, 100], "fly": [5, 83, 91], "fmha": [0, 30, 71, 83, 88, 90, 94], "fmt_dim": 1, "focu": [7, 25, 26, 29, 73], "focus": [12, 74, 78, 79, 94], "fold": 90, "folder": [0, 3, 6, 19, 76, 91, 92, 94], "folder_trt_llm": 16, "follow": [3, 6, 7, 10, 12, 14, 15, 16, 17, 19, 20, 25, 26, 27, 28, 29, 30, 31, 33, 37, 48, 49, 52, 56, 57, 58, 66, 67, 68, 70, 74, 75, 76, 77, 78, 79, 80, 81, 83, 89, 91, 92, 94, 95, 96, 98, 99], "footprint": [5, 21, 28, 90], "for_each_rank": 85, "forc": [0, 5, 11, 26, 29, 74], "force_drop_id": 84, "force_low_precision_all_reduce_strategi": 11, "force_multi_block_mod": 74, "force_nccl_all_reduce_strategi": 94, "force_num_profil": 71, "force_words_id": 6, "forecast": 12, "foretel": 52, "fork": 73, "form": [0, 3, 5, 12, 71, 83, 89], "format": [0, 3, 11, 15, 17, 19, 22, 25, 27, 28, 40, 65, 66, 70, 71, 72, 76, 78, 88, 89, 90, 93, 94, 98], "former": [16, 25, 52], "formula": [28, 83], "forth": 29, "forum": 94, "forward": [0, 1, 7, 12, 14, 16, 27, 29, 53, 82, 83, 84, 85, 93, 94, 96, 97, 98, 99, 100], "forward_loop": 74, "forward_with_cfg": 85, "forward_without_cfg": 85, "forwardasync": 1, "forwarddispatch": 1, "forwardsync": 1, "found": [3, 4, 5, 6, 7, 12, 16, 18, 22, 29, 66, 68, 74, 76, 78, 81, 91, 100], "foundat": 27, "four": [3, 7, 12, 15, 26, 27, 84], "fourth": 3, "fp": [91, 94], "fp16": [5, 10, 11, 13, 15, 17, 21, 22, 25, 30, 65, 74, 78, 81, 83, 89, 92, 93, 94], "fp32": [0, 5, 26, 28, 30, 65, 71, 83, 88, 89, 92, 93, 94], "fp4": [20, 27, 28, 29, 30, 94], "fp8": [11, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 47, 55, 60, 65, 71, 74, 79, 81, 83, 86, 90, 92, 94, 95, 98], "fp8_block_scal": 71, "fp8_blockscale_gemm": 94, "fp8_inputs_overrid": 83, "fp8_kv_cach": [5, 91], "fp8_per_channel_per_token": 71, "fp8_qdq": 91, "fp8_rowwise_gemm_plugin": 30, "fp_valu": 5, "fpa_intb": 94, "fraction": [0, 31, 71, 83, 84, 88], "framework": [12, 14, 15, 18, 19, 72, 83, 94], "franc": [14, 16, 41, 43, 44, 45, 47, 48, 49, 50, 51, 55, 60, 67, 68, 76, 82, 89, 93, 95], "free": [0, 1, 8, 10, 16, 17, 28, 29, 31, 72, 80, 84, 85, 88, 90, 99], "free_gpu_memory_fract": [31, 37, 50, 54, 71, 82, 94], "free_resourc": [97, 99], "freed": 74, "freedom": 19, "freegpumemoryfract": [0, 90, 94], "freenumblock": 0, "freez": 28, "french": 89, "freq": 83, "frequenc": [74, 84], "frequency_penalti": [71, 88, 94], "frequencypenalti": [0, 1, 6], "frequent": [9, 93], "friend": [0, 1, 74], "friendli": [29, 83], "from": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 68, 70, 71, 72, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 93, 94, 95, 96, 97, 98, 99, 100], "from_argu": 85, "from_checkpoint": [19, 85], "from_config": 85, "from_dict": [71, 85], "from_dir": 88, "from_engin": 88, "from_hugging_fac": [14, 17, 19, 85], "from_jax": 19, "from_json_fil": [71, 85], "from_kera": 19, "from_meta_ckpt": [19, 85], "from_nemo": [19, 85], "from_pretrain": 85, "from_prun": 85, "from_serialized_engin": 88, "from_str": 83, "fromfil": 16, "fruit": 28, "full": [0, 4, 5, 6, 9, 10, 12, 22, 23, 27, 28, 29, 31, 71, 72, 73, 74, 77, 83, 88, 89, 90, 93], "full_lik": 53, "fulli": [28, 41, 94], "funcnam": 0, "function": [0, 1, 3, 5, 13, 14, 16, 18, 19, 26, 27, 29, 70, 71, 72, 73, 81, 86, 88, 90, 91, 92, 93, 94, 99, 100], "functiont": 0, "further": [3, 4, 5, 12, 16, 21, 25, 27, 28, 29, 30, 74, 78, 81, 98], "furthermor": [12, 26, 29, 78], "fuse": [5, 12, 16, 26, 28, 30, 81, 83, 89, 94, 96, 98], "fuse_a": [26, 28], "fuse_fp4_qu": 30, "fuse_qkv_project": 85, "fuseattentionwithbiaspass": 7, "fused_gate_up_dora": 84, "fused_gate_up_lora": 84, "fused_mo": 71, "fusedgatedmlp": [83, 84], "fusevalu": 1, "fusion": [7, 28, 30, 65, 72, 80, 90, 91, 94, 98], "fusion_op": 83, "futur": [2, 5, 6, 8, 12, 17, 19, 25, 29, 30, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 55, 60, 66, 67, 68, 70, 71, 72, 74, 76, 82, 83, 89, 90, 91, 94, 95], "fuyu": [92, 94], "g": [3, 8, 11, 17, 27, 28, 31, 53, 56, 57, 58, 71, 80, 88, 96], "g1": 80, "g2": 80, "gain": [29, 77, 80], "gamma": 83, "gate": [10, 17, 30, 76, 83, 94], "gate_a": 83, "gate_a_bia": 83, "gate_bia": 83, "gate_proj": 17, "gate_x": 83, "gate_x_bia": 83, "gatedmlp": [83, 84], "gather": [0, 1, 30, 48, 49, 71, 83, 88], "gather_all_token_logit": [30, 94], "gather_context_logit": [30, 71, 85, 88], "gather_dim": [16, 83], "gather_generation_logit": [30, 71, 85, 88], "gather_last_token_logit": 83, "gather_nd": 83, "gather_output": 84, "gathercontext": [0, 94], "gatheredid": 1, "gatherel": 83, "gathergenerationlogit": 0, "gathermod": 83, "gathertre": 1, "gatherv2": 83, "gb": [2, 23, 28, 66, 71, 74], "gb200": [28, 94], "gcc": 66, "gd": 0, "gdrdma": 2, "geforc": 94, "gegelu": 83, "gegelu_limit": 84, "geglu": 83, "gelu": [83, 85], "gelu_pytorch_tanh": 94, "gelu_tanh": 84, "gemm": [7, 28, 29, 30, 80, 83, 89, 90, 94], "gemm_allreduc": 83, "gemm_allreduce_plugin": [30, 88], "gemm_fc1": 26, "gemm_plugin": [10, 13, 15, 16, 30, 74, 78, 81, 84, 89], "gemm_swiglu": 83, "gemm_swiglu_plugin": [30, 78, 86], "gemma": [19, 70, 91, 92, 94], "gemma2": 92, "gemma2_added_field": 85, "gemma2_config": 85, "gemma3": 94, "gemma3_added_field": 85, "gemma3_config": 85, "gemma_added_field": 85, "gemma_config_kwarg": 85, "gemmaconfig": 85, "gemmaforcausallm": 85, "gen": [71, 94], "genai": [25, 31, 64], "genattent": 26, "genenginepath": 0, "gener": [0, 1, 3, 6, 9, 12, 15, 16, 17, 19, 20, 21, 22, 24, 26, 27, 28, 30, 41, 42, 43, 52, 65, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 85, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100], "generate_alibi_bias": 83, "generate_alibi_slop": 83, "generate_async": [37, 48, 49, 71, 94], "generate_eplb_config": 29, "generate_logn_sc": 83, "generate_tllm_weight": 17, "generated_text": [41, 44, 45, 55, 59, 67, 68, 76, 82, 89, 95], "generatedtokensperenginestep": 1, "generation_complet": 100, "generation_in_progress": 100, "generation_logit": [54, 71, 88], "generation_onli": 71, "generation_phas": 5, "generation_request": 100, "generation_to_complet": 100, "generationexecutor": [2, 94], "generationlength": 1, "generationlengthsdevic": 1, "generationlengthshost": 1, "generationlengthshostcopi": 1, "generationlogit": 0, "generationmixin": 85, "generationrequestid": 2, "generationresult": 71, "generationsequ": 88, "generationsess": [5, 88, 90], "generationstep": 1, "genericprompttuningparam": 1, "genert": 2, "genexecutorconfig": 0, "genidx": 0, "genrequest": 1, "genrespons": 2, "get": [0, 1, 2, 3, 5, 7, 10, 13, 17, 24, 27, 28, 29, 31, 32, 33, 37, 41, 42, 66, 67, 68, 71, 72, 73, 76, 78, 83, 85, 88, 89, 93, 94, 95, 100], "get_1d_sincos_pos_embed_from_grid": 84, "get_2d_sincos_pos_emb": 84, "get_2d_sincos_pos_embed_from_grid": 84, "get_audio_featur": 88, "get_batch_cache_indic": 99, "get_batch_idx": 88, "get_block_offset": 88, "get_buff": 99, "get_comm": 71, "get_config_group": 85, "get_context_phase_param": 71, "get_device_cap": 60, "get_first_past_key_valu": 84, "get_hf_config": 85, "get_input": 7, "get_kv_cache_ev": [52, 71], "get_kv_cache_events_async": 71, "get_max_resource_count": [99, 100], "get_needed_resource_to_complet": [99, 100], "get_next_medusa_token": 88, "get_num_free_block": 99, "get_num_heads_kv": 88, "get_output": [7, 16], "get_par": [7, 83], "get_pytorch_backend_config": 71, "get_request_typ": 71, "get_rope_index": 88, "get_seq_idx": 88, "get_shap": 17, "get_slic": 17, "get_stat": [71, 94], "get_stats_async": 71, "get_timestep_embed": 84, "get_us": [7, 83], "get_visual_featur": 88, "get_vocab": [0, 3], "get_weight": 84, "getacceptancethreshold": 0, "getacceptedlengthscumsum": 1, "getacceptedpackedpath": 1, "getadditionalmodeloutput": 0, "getadditionaloutputnam": 0, "getaddr": 0, "getaddress": 1, "getagentst": 0, "getallnewtoken": 1, "getallottedtimem": 0, "getattentionconfig": 0, "getbackend": 0, "getbackendagentdesc": 0, "getbadword": 0, "getbatchingtyp": 0, "getbatchsizet": 0, "getbeamsearchbuff": 1, "getbeamsearchdiversityr": 0, "getbeamwidth": 0, "getbeamwidtharrai": 0, "getbuffermanag": 1, "getcachest": 0, "getcachetransceiverconfig": 0, "getcapac": 1, "getcapacityschedulerpolici": 0, "getclientid": 0, "getcommptr": 1, "getcommst": 0, "getcommunicationmod": 0, "getcommunicationtyp": 0, "getconfig": 0, "getconnect": 0, "getconnectioninfo": 0, "getcontextchunkingpolici": 0, "getcontextexecutor": 0, "getcontextfmha": 1, "getcontextparallel": 1, "getcontextparallelgroup": 1, "getcontextparallelrank": 1, "getcontextphaseparam": 0, "getcopyonpartialreus": 0, "getcpu": 1, "getcpudiff": 1, "getcrossattentionmask": 0, "getcrosskvcachefract": 0, "getcudagraphcaches": 0, "getcudagraphmod": 0, "getcumlogprob": 1, "getdata": 0, "getdatatyp": [0, 1], "getdatatypenam": 1, "getdebugconfig": 0, "getdebuginputtensor": 0, "getdebugoutputtensor": 0, "getdebugtensornam": 0, "getdebugtensorsmaxiter": 0, "getdecodedurationm": 0, "getdecoderetentionprior": 0, "getdecoderstream": 1, "getdecodingconfig": 0, "getdecodingmod": 0, "getdefaultbatchslot": 1, "getdefaulteaglechoic": 1, "getdesc": 0, "getdevic": 1, "getdevicecacheperc": 0, "getdeviceid": 0, "getdeviceof": 1, "getdimens": 1, "getdirectori": 0, "getdrafttoken": 0, "getdstdesc": 0, "getdynamicbatchconfig": 0, "getdynamicbatchmovingaveragewindow": 0, "getdynamictreemaxtopk": 0, "geteaglechoic": 0, "geteagleconfig": 0, "getearlystop": 0, "getembeddingbia": 0, "getembeddingt": 0, "getenablebatchsizetun": 0, "getenableblockreus": 0, "getenablechunkedcontext": 0, "getenablecontextfmhafp32acc": 0, "getenablemaxnumtokenstun": 0, "getenablepartialreus": 0, "getenabletrtoverlap": 0, "getencodedvocab": 0, "getencoderhiddens": 1, "getencoderinputfeatur": 0, "getencoderinputtokenid": 0, "getencoderoutputlength": 0, "getendid": 0, "geterrormsg": 0, "geteventbuffermaxs": 0, "getexecutionconfig": 1, "getextendedruntimeperfknobconfig": 0, "getexternaldrafttokensconfig": 0, "getfastlogit": 0, "getfinishedstep": 1, "getfinishedsum": 1, "getfinishreason": 1, "getfirstgentoken": 0, "getfirstlocallay": 1, "getfreegpumemoryfract": 0, "getfrequencypenalti": 0, "getfunctionpoint": 0, "getgatheredid": 1, "getgathergenerationlogit": 0, "getgemmallreducedtyp": 1, "getgenexecutor": 0, "getgpu": 1, "getgpudiff": 1, "getgpuspergroup": 1, "getgpuspernod": 1, "getgpuweightsperc": [0, 13], "getguid": 0, "getguideddecodingconfig": 0, "getguideddecodingparam": 0, "getguidetyp": 0, "gethandl": 0, "gethiddens": 1, "gethostcaches": 0, "gethostmemori": 1, "getid": 1, "getinittozero": 1, "getinputtokenextraid": 0, "getinputtokenid": 0, "getinst": [0, 1], "getipcunicastpoint": 1, "getisorchestr": 0, "getiterstatsmaxiter": 0, "getjointdecodinginput": 1, "getjointdecodingoutput": 1, "getkvcacheconfig": 0, "getkvcacheconfigref": 0, "getkvcacheeventmanag": 0, "getkvcacheretentionconfig": 0, "getkvcachetyp": 1, "getkvdatatyp": 1, "getlanguageadapteruid": 0, "getlastrank": 1, "getlatestdebugtensor": 0, "getlatestev": 0, "getlatestiterationstat": [0, 3], "getlatestrequeststat": 0, "getlayertyp": 1, "getlen": 0, "getlengthpenalti": 0, "getlevel": 1, "getlocalagentdesc": 0, "getlocalrank": 1, "getlogit": 0, "getlogitsdtyp": 1, "getlogitspostprocessor": 0, "getlogitspostprocessorconfig": 0, "getlogitspostprocessornam": 0, "getlogprob": 1, "getlookaheadconfig": 0, "getlookaheaddecodingconfig": 0, "getlookaheaddecodingmaxnumrequest": 0, "getloraconfig": 0, "getloramodul": 1, "getloraprefetchdir": 0, "getmanagedweightsmapopt": 1, "getmanageweightstyp": 1, "getmaxadapters": 0, "getmaxattentionwindowvec": 0, "getmaxbatchs": [0, 1], "getmaxbeamwidth": [0, 1], "getmaxdecodingdecodertoken": 1, "getmaxdecodingdrafttoken": 1, "getmaxdecodingenginetoken": 1, "getmaxdecodingtoken": 1, "getmaxdraftpathlen": 1, "getmaxencoderlen": 1, "getmaxinputlen": 1, "getmaxlorarank": 1, "getmaxnonleafnodesperlay": 1, "getmaxnumpath": 1, "getmaxnumtoken": [0, 1], "getmaxpagesperblock": 1, "getmaxpagesperblockdevic": 0, "getmaxpagesperblockhost": 0, "getmaxpathlen": 1, "getmaxpositionembed": 1, "getmaxpromptembeddingtables": 1, "getmaxqueues": 0, "getmaxseqidlemicrosecond": 0, "getmaxsequencelen": 1, "getmaxsequencelength": 1, "getmaxtoken": 0, "getmedusachoic": [0, 1], "getmemorytyp": [0, 1], "getmemorytypenam": 1, "getminp": 0, "getmintoken": 0, "getmlphiddens": 1, "getmodelconfig": [0, 1], "getmodelconfigmut": 1, "getmodelnam": 1, "getmodelvari": 1, "getmpist": 0, "getmropeconfig": 0, "getmropepositiondelta": 0, "getmroperotarycossin": 0, "getmultiblockmod": 0, "getmulticastpoint": 1, "getmultimodalembed": 0, "getnam": [0, 1], "getnbattentionlay": 1, "getnbhead": 1, "getnbkvhead": 1, "getnblay": 1, "getnbrnnlay": 1, "getnextdrafttoken": 1, "getnextdrafttokenslength": 1, "getngrams": 0, "getnoderank": 1, "getnoderankof": 1, "getnorepeatngrams": 0, "getnormalizelogprob": 0, "getnotifiedsyncmessag": 0, "getnumcopystream": [0, 1], "getnumdecodingenginetoken": 1, "getnumdevicemodulelay": 0, "getnumensurework": 0, "getnumhostmodulelay": 0, "getnumkvheadsperlay": 1, "getnumkvheadsperlayerlocalrang": 1, "getnumlanguag": 1, "getnumnod": 0, "getnumpackedmask": 1, "getnumpag": 1, "getnumputwork": 0, "getnumresponsesreadi": 0, "getnumreturnbeam": [0, 1], "getnumreturnsequ": 0, "getnumtransformerlay": 1, "getonboardblock": 0, "getop": 0, "getoptimaladapters": 0, "getoptprofilessplitpoint": 1, "getorchestratorconfig": 0, "getorchleadercomm": 0, "getoutputconfig": 0, "getpadid": 0, "getpagedcontextfmha": 1, "getpageptr": 1, "getpagewidth": 1, "getparallelconfig": 0, "getparentid": 1, "getparticipantid": 0, "getpath": 1, "getpathopt": 1, "getpeftcacheconfig": 0, "getperblockretentionprioritydur": 0, "getpin": 1, "getpinneddiff": 1, "getpinnedpool": 1, "getpinnedpooldiff": 1, "getpipelineparallel": 1, "getpipelineparallelgroup": 1, "getpipelineparallelrank": 1, "getpositionid": 0, "getposteriorthreshold": 0, "getppreducescatt": 1, "getprecis": 1, "getpresencepenalti": 0, "getprevdrafttokenslength": 1, "getprior": 0, "getprocessorbatch": 0, "getprocessormap": 0, "getprompttableoffload": 0, "getprompttuningconfig": 0, "getquantmod": 1, "getrank": 1, "getrecvpollperiodm": 0, "getremotenam": 0, "getrepetitionpenalti": 0, "getrepl": 0, "getreqid": 0, "getrequestid": 0, "getrequeststatsmaxiter": 0, "getrequesttyp": 0, "getresult": [0, 2, 3], "getreturnallgeneratedtoken": 0, "getrnnconfig": 1, "getrotaryembeddingdim": 1, "getruntimedefault": 1, "getruntimetyp": 0, "getsamplingconfig": [0, 1], "getschedulerconfig": 0, "getschedulerconfigref": 0, "getse": 0, "getsecondaryoffloadminprior": 0, "getselfidx": 0, "getsequencelength": 1, "getserializedst": 0, "getshap": [0, 1], "getsinktokenlength": 0, "getsiz": [0, 1], "getsizeinbit": 1, "getsizeinbyt": [0, 1], "getsizeperhead": 1, "getskipcrossattnblock": 0, "getslotsperpag": 1, "getsocketst": 0, "getspawnprocess": 0, "getspecdecconfig": 0, "getspeculativedecodingmod": 1, "getspeculativedecodingmodul": 1, "getspeculativedecodingmoduleptr": 1, "getsrcdesc": 0, "getstat": 0, "getstatu": 1, "getstoptokenid": 0, "getstopword": 0, "getstream": [0, 1], "getsumlocalkvhead": 1, "getsyncmessag": 0, "gettag": 0, "gettaskid": 0, "gettemperatur": 0, "gettensorparallel": 1, "gettensorparallelgroup": 1, "gettensorparallelrank": 1, "getter": 6, "gettoken": 0, "gettokenizerstr": 0, "gettokenrangeretentionconfig": 0, "gettokensperblock": 1, "gettopk": 0, "gettopp": 0, "gettoppdecai": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettotalnumpag": 1, "gettransfermod": 0, "gettyp": [0, 1], "getunderlyingdecod": 1, "getunicastpoint": 1, "getusegpudirectstorag": 0, "getuvm": 1, "getuvmdiff": 1, "getverificationsets": 0, "getvers": 1, "getvocabs": 1, "getvocabsizepad": 1, "getweight": 0, "getwindows": 0, "getworkerexecutablepath": 0, "getworlds": 1, "gh200": 94, "ghost": 52, "ghz": 46, "gib": [9, 90], "gid": 0, "gigabyt": 23, "git": [10, 20, 66, 70, 89, 93, 95], "github": [19, 20, 26, 29, 66, 70, 72, 89, 94, 95], "give": [3, 27, 28, 72, 78, 80, 85], "given": [0, 1, 3, 6, 10, 17, 19, 23, 29, 70, 71, 73, 79, 80, 83, 84, 85, 87, 88, 90, 91, 94, 99], "givyboi": 59, "glm": [70, 83, 92, 94], "glm4": [70, 94], "global": [0, 5, 8, 16, 26, 28, 94], "global_max_input_length": 88, "global_max_output_length": 88, "globalrequestid": 0, "glossari": [21, 24], "gm": 93, "gnu": 66, "go": [5, 6, 52, 77, 94], "goal": [29, 82], "goe": [27, 70, 74], "good": [3, 16, 20, 28, 29, 74, 77, 80, 81], "got": [0, 43, 46, 47, 48, 49, 50, 51, 52, 53, 59, 60, 70, 74, 93], "gpqa": [26, 28], "gpt": [1, 5, 12, 16, 18, 22, 25, 30, 65, 70, 74, 83, 90, 91, 92, 93, 94], "gpt2": [85, 93], "gpt3": 23, "gpt_attent": [5, 7, 24, 83, 89, 94], "gpt_attention_plugin": [10, 16, 30, 74, 84, 88, 93, 94], "gpt_attention_plugin_remove_pad": 7, "gpt_variant": [85, 94], "gptattent": 7, "gptattentionpluginremovepaddingrewritepass": 7, "gptconfig": 85, "gptdecod": 6, "gptdecoderbatch": 94, "gptdecoderptr": 1, "gptforcausallm": 85, "gptj": 85, "gptjconfig": 85, "gptjforcausallm": 85, "gptjmodel": 85, "gptlmheadmodel": 93, "gptmanag": 94, "gptmanagerbenchmark": [9, 66, 94], "gptmodel": 85, "gptmodelconfig": 94, "gptneoxforcausallm": 85, "gptneoxmodel": 85, "gptq": [25, 65, 92, 94], "gptsession": 94, "gptsessionbenchmark": 94, "gpu": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 15, 18, 19, 22, 23, 24, 25, 27, 30, 31, 37, 56, 57, 58, 60, 65, 66, 68, 70, 71, 75, 76, 77, 78, 81, 83, 85, 88, 89, 92, 93, 94, 97, 98], "gpu_weights_perc": [13, 88], "gpudirect": 0, "gpumemusag": [0, 31], "gpus_per_nod": [30, 31, 71], "gpuspernod": [1, 6], "gpusync": 1, "gpuweightsperc": [0, 13], "gqa": [5, 8, 21, 24, 30, 83, 94, 98], "grace": [9, 29, 65, 92], "gradient": 22, "gradual": 19, "grain": [7, 29], "gram": 12, "grammar": [0, 3, 71], "granit": [92, 94], "graph": [0, 16, 20, 27, 28, 29, 65, 71, 73, 74, 83, 88, 89, 90, 93, 94, 98, 99], "graph_rewrit": 7, "graphic": 54, "gratitud": 27, "gre": 31, "great": [21, 29, 54], "greater": [0, 2, 5, 24, 25, 26, 29, 30, 83], "greatli": [9, 19, 27, 78, 81], "greedi": [0, 6, 97], "greedy_sampl": [44, 45, 71], "greedysampl": 0, "greedysamplinghost": 1, "grid": [16, 78, 80, 83, 84], "grid_search_engin": 76, "grid_siz": 84, "grok": [92, 94], "ground": 73, "groundbreak": 72, "group": [0, 3, 4, 6, 8, 16, 21, 28, 29, 65, 71, 83, 84, 91, 94, 98], "group_cl": 85, "group_norm": 83, "group_siz": [15, 71, 83], "groupedrmsnorm": 26, "groupgemm": [28, 29], "groupnorm": [83, 84], "grow": [1, 12, 80], "gsm8k": 28, "gt": 83, "gtc": [20, 26], "guarante": [0, 6, 9, 19, 29, 74, 75, 76, 78, 82], "guaranteed_no_evict": [0, 71, 74, 82], "guaranteednoevictschedul": 100, "guard": [52, 76], "guid": [0, 16, 20, 25, 41, 42, 65, 70, 71, 72, 73, 75, 76, 77, 78, 81, 83, 93, 94, 98], "guidanc": [12, 31, 81, 84, 85], "guided_decod": [46, 71], "guided_decoding_backend": [46, 71], "guideddecodingbackend": 0, "guideddecodingconfig": [0, 3], "guideddecodingparam": [0, 3, 46, 71], "guidelin": [2, 77], "guidetyp": [0, 3], "gw": 7, "h": [2, 3, 5, 12, 17, 27, 30, 31, 34, 35, 36, 76, 83, 85, 89, 94], "h0": 27, "h1": 83, "h100": [19, 25, 30, 72, 75, 76, 78, 79, 80, 94], "h20": 30, "h200": [22, 30, 75, 94], "h2d": 53, "ha": [0, 1, 3, 5, 9, 10, 11, 15, 16, 17, 19, 20, 21, 25, 26, 27, 28, 29, 30, 33, 66, 71, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 88, 90, 91, 93, 94, 97, 99, 100], "had": [19, 28, 78, 80], "half": [0, 1, 16, 28, 76, 83], "halv": [22, 83], "hand": [9, 12, 18, 77], "handl": [0, 1, 2, 4, 8, 17, 19, 21, 26, 76, 78, 80, 81, 82, 83, 84, 96, 97], "handle_per_step": 88, "hang": [0, 29, 70, 93, 94], "happen": [3, 6, 9, 16, 29, 68, 90, 93], "happi": 88, "har": 28, "hard": [5, 71], "harder": 6, "hardwar": [8, 25, 28, 37, 65, 66, 94], "has_affin": 83, "has_bia": 83, "has_config_group": 85, "has_position_embed": 88, "has_scal": 83, "has_token_type_embed": 88, "has_zero_point": [15, 71], "hascontextawaitthread": 0, "hasdraftlogit": 1, "haserror": [0, 3], "hasgenawaitthread": 0, "hash": [0, 71], "hasresult": 0, "hasrnnconfig": 1, "hasspeculativedecodingmodul": 1, "hattizai": 94, "have": [0, 1, 3, 4, 5, 6, 9, 10, 12, 15, 16, 17, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 32, 52, 55, 56, 57, 58, 59, 68, 70, 71, 72, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 88, 89, 90, 92, 93, 94, 96], "hbm3": 75, "hbm3e": 23, "he": 52, "head": [1, 6, 8, 12, 16, 21, 27, 28, 30, 55, 60, 65, 74, 83, 84, 94, 98], "head_dim": [98, 99], "head_siz": [5, 83, 85, 88, 94], "header": 2, "headsiz": 83, "headsperlay": 1, "health": [31, 59], "heat": 6, "heavi": 81, "heavier": 77, "heavili": 29, "height": [40, 84, 88], "hello": [41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 56, 59, 60, 67, 68, 76, 82, 89, 95], "help": [2, 3, 5, 7, 16, 26, 27, 29, 30, 31, 34, 35, 46, 53, 55, 61, 62, 66, 73, 74, 75, 76, 79, 80, 81, 82, 83, 89, 94, 97], "helper": [1, 83], "henc": 96, "here": [2, 3, 7, 10, 13, 14, 15, 16, 17, 19, 20, 22, 23, 27, 28, 29, 31, 33, 37, 41, 46, 66, 73, 76, 77, 78, 80, 81, 83, 88, 89, 90, 91, 93, 95, 98, 99, 100], "heterogen": 2, "heurist": [5, 28, 74, 83, 94], "hf": [6, 10, 13, 17, 30, 31, 47, 48, 49, 50, 51, 55, 56, 57, 58, 60, 74, 75, 76, 88, 92, 93, 95], "hf_config_or_dir": 85, "hf_lora_convert": 10, "hf_model": [74, 85], "hf_model_dir": [13, 14, 15, 19, 85], "hf_model_nam": 74, "hf_model_or_dir": 85, "hf_quant_config": 74, "hf_token": 74, "hfconfigordir": 85, "hgx": 23, "hi": 10, "hidden": [0, 3, 4, 5, 6, 10, 12, 26, 27, 71, 83, 84, 94], "hidden_act": [15, 84, 85], "hidden_dim": [0, 5, 83], "hidden_dim_per_head": [5, 83], "hidden_dtyp": 84, "hidden_s": [0, 7, 15, 17, 83, 84, 85, 88, 96, 98], "hidden_size_in": 10, "hidden_size_out": 10, "hidden_size_per_head": 83, "hidden_st": [14, 83, 84, 85, 88, 93, 96], "hidden_states_for_emb": 85, "hiddens": [0, 1, 6], "hide": [26, 28], "hierarch": 15, "hierarchi": [19, 65, 83], "high": [3, 12, 14, 16, 19, 21, 25, 26, 27, 28, 70, 74, 82, 83, 90, 94], "higher": [0, 1, 5, 6, 9, 10, 12, 17, 21, 22, 24, 28, 29, 72, 75, 82, 90, 94, 96], "highest": [6, 7, 22, 23], "highli": [12, 16, 28, 29, 73, 78], "highlight": [22, 25, 78, 80], "himself": 52, "hin": 27, "hint": [74, 83], "histori": 28, "hit": [0, 28, 71, 75, 80, 81, 94], "hk": 12, "ho": 10, "hoc": [19, 88], "hold": [0, 1, 3, 4, 7, 8, 9, 10, 12, 29, 71, 77, 84, 90, 97], "home": [20, 60, 74], "homo_head_pattern": 84, "homogen": 2, "hope": [26, 29], "hopper": [5, 9, 20, 21, 22, 25, 27, 28, 29, 30, 65, 66, 72, 78, 92, 94], "horatio": 52, "horizont": [28, 30], "host": [1, 10, 28, 31, 33, 38, 53, 58, 65, 66, 71, 81, 83, 94], "host_cache_s": 71, "host_context_length": [83, 84, 85, 88, 93], "host_context_progress": [83, 84, 93], "host_cross_kv_cache_block_offset": [84, 88], "host_cross_kv_cache_pool_map": 84, "host_cross_kv_cache_pool_point": 84, "host_kv_cache_block_offset": [83, 84, 88, 93], "host_kv_cache_block_point": 93, "host_kv_cache_pool_map": [83, 84, 93], "host_kv_cache_pool_point": [83, 84, 93], "host_max_attention_window_s": [83, 84, 93], "host_past_key_value_length": [83, 84, 93], "host_request_typ": [83, 84, 85, 93], "host_runtime_perf_knob": [83, 84, 93], "host_sink_token_length": [83, 84, 93], "hostcaches": [0, 9], "hostmemori": 1, "hostnam": 31, "hot": 29, "hottest": 29, "hour": 76, "hous": [29, 77], "how": [0, 2, 3, 12, 14, 16, 17, 19, 29, 30, 33, 41, 56, 65, 70, 71, 73, 76, 78, 79, 81, 83, 89, 90, 91, 93, 95, 97, 98], "howev": [2, 3, 5, 12, 19, 20, 21, 26, 27, 28, 29, 31, 74, 77, 78, 80, 81, 82, 90, 94, 96, 97], "hpc": 22, "html": [1, 16, 83, 93], "http": [0, 1, 4, 10, 16, 19, 20, 26, 30, 31, 34, 35, 36, 61, 62, 63, 66, 67, 68, 70, 83, 89, 91, 93, 94, 95], "hub": [18, 59, 71, 74, 89, 94, 95], "hug": [3, 10, 13, 18, 19, 37, 71, 74, 85, 89, 94], "huggingfac": [0, 10, 14, 15, 17, 19, 20, 35, 59, 62, 70, 74, 75, 76, 89, 92, 93, 94, 96], "huggingface_exampl": 95, "huggingface_hub": 59, "huggingface_model_card": 95, "human": [26, 74], "hundr": 29, "hurt": [28, 29, 81], "hw": [26, 28, 29], "hybrid": [4, 94], "hyper": 15, "hypothesi": 12, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 68, 70, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100], "ia3": 5, "iactivationlay": 16, "ibrahimamin1": 94, "ibufferptr": 1, "iconstantlay": 83, "icudaengin": [88, 90], "id": [0, 1, 3, 9, 27, 29, 37, 49, 71, 74, 75, 83, 84, 88, 89, 98, 99], "idea": [10, 28, 29, 81], "ideal": [7, 29, 78, 80, 94], "ident": [3, 9, 28, 30, 83], "identifi": [0, 6, 10, 12, 16, 29, 74, 80, 83], "idl": 0, "idtyp": [0, 3], "idx": 88, "ieee": 91, "ieinsumlay": 83, "ielementwiselay": 83, "iexecutioncontext": [88, 90], "ifb": [12, 94], "ifilllay": 83, "igatherlay": 83, "ignor": [30, 71, 74, 83, 88], "ignore_eo": [71, 94], "igptdecod": 1, "ihostmemori": [1, 16, 88], "ii": [5, 83], "ij": 83, "ijk": 83, "ijl": 83, "ik": 83, "ikl": 83, "ilay": [7, 16], "illustr": [7, 12, 18, 26, 27, 29], "ilogg": 1, "ilooplay": 83, "imag": [31, 35, 40, 56, 57, 58, 62, 65, 67, 68, 74, 84, 88, 94], "image64": 62, "image_grid_thw": 88, "image_patches_indic": 88, "image_path": 88, "image_s": 85, "image_token_index": 88, "image_url": [35, 62], "imatrixmultiplylay": 83, "imb": 29, "imbal": [29, 80], "imbalanc": 29, "immedi": [5, 12, 72, 76, 93], "immut": 1, "impact": [11, 12, 21, 25, 26, 27, 28, 29, 31, 59, 77, 78, 80, 81, 82], "imped": [25, 29], "impl": [0, 100], "implement": [2, 3, 5, 6, 8, 12, 15, 16, 18, 19, 21, 28, 53, 65, 72, 83, 84, 89, 91, 92, 93, 94, 96, 97, 99, 100], "implicit": [1, 5, 12, 83], "implicitli": 1, "import": [11, 12, 17, 19, 21, 25, 27, 28, 31, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 61, 62, 63, 65, 67, 68, 76, 78, 80, 81, 82, 89, 92, 94, 95, 96, 97, 99], "importantli": 29, "impos": 25, "improv": [5, 9, 11, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 43, 47, 48, 50, 51, 65, 72, 74, 75, 76, 78, 79, 80, 81, 94, 95, 98], "in_channel": 84, "in_featur": [15, 16, 84], "in_hidden_s": 83, "in_len": 7, "in_point": 83, "in_progress": 88, "includ": [0, 1, 2, 3, 5, 6, 9, 10, 12, 15, 16, 17, 18, 21, 22, 24, 27, 28, 29, 30, 31, 37, 46, 52, 60, 66, 68, 70, 71, 72, 78, 81, 83, 89, 91, 93, 94, 97, 98, 99, 100], "include_stop_str_in_output": 71, "inclus": 83, "incompat": [30, 94, 95], "incorpor": [0, 26, 29, 72, 94], "incorrect": [9, 12, 94], "increas": [0, 5, 9, 12, 16, 20, 22, 23, 26, 27, 28, 29, 30, 73, 74, 76, 78, 81, 82, 83, 94, 100], "incred": 72, "increment": [29, 66, 94], "incur": [16, 26], "inde": 90, "independ": [0, 1, 2, 3, 12, 83], "index": [0, 1, 3, 8, 12, 17, 26, 37, 54, 65, 67, 68, 71, 83, 88, 89, 94, 98], "index_select": 83, "indic": [0, 1, 3, 5, 6, 12, 15, 71, 82, 83, 84, 88, 90, 99], "indim": 1, "indimfirst": 1, "indirect": 1, "individu": [26, 29, 94], "indivis": 94, "inductor": 71, "industri": 74, "ineffici": [5, 26], "inetworkdefinit": [7, 16, 83], "inevit": 16, "inf": 53, "infeas": 3, "infer": [0, 2, 6, 10, 12, 16, 18, 19, 20, 21, 22, 23, 26, 28, 30, 35, 62, 65, 70, 73, 75, 76, 77, 78, 79, 81, 82, 83, 88, 91, 93, 94, 97], "infer_shap": 88, "inferencerequest": 94, "infin": 33, "infinit": [16, 74, 75], "inflat": 26, "inflight": [0, 5, 10, 12, 31, 69, 71, 74, 79, 80, 83, 94, 98, 100], "inflight_request_id": 100, "inflightbatch": 0, "inflightbatchingstat": [0, 31], "influenc": [26, 81], "info": [0, 29, 30, 31, 74, 90, 93], "inform": [0, 1, 2, 3, 5, 6, 8, 12, 15, 16, 21, 24, 26, 29, 31, 65, 72, 74, 76, 92, 93, 94], "infti": 6, "inher": 29, "inherit": [17, 19, 83, 96, 97, 99, 100], "init": [1, 20, 28, 66, 94], "init_audio_encod": 88, "init_image_encod": 88, "init_llm": 88, "init_processor": 88, "init_token": 88, "initi": [1, 2, 12, 17, 26, 29, 53, 71, 74, 78, 80, 81, 90, 93, 94, 96, 98, 100], "initial_global_assign": 29, "initializer_list": [0, 1], "initmemorypool": 90, "inittozero": 1, "inlin": [0, 1], "inner": 83, "inner_layernorm": [84, 85], "innov": [28, 29], "inp": 83, "inpaint": [35, 62], "inprogress": 1, "input": [0, 1, 3, 6, 7, 9, 10, 11, 12, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 39, 40, 57, 62, 65, 70, 71, 73, 74, 75, 76, 77, 79, 81, 82, 83, 84, 85, 88, 90, 92, 93, 94, 96, 97, 98, 100], "input_1": 83, "input_1_": 83, "input_audio": 88, "input_featur": 85, "input_fil": 94, "input_id": [9, 14, 26, 74, 83, 85, 88, 93, 96], "input_imag": 88, "input_layernorm": [14, 15, 17, 96], "input_length": [83, 84, 85, 88], "input_list": 83, "input_n": 83, "input_n_": 83, "input_text": [14, 16, 88, 89], "input_timing_cach": [30, 71], "input_token_extra_id": 88, "inputbuff": 1, "inputdesc": 16, "inputdtyp": 1, "inputgentokenshost": 1, "inputlen": 1, "inputpack": [1, 6], "inputs_emb": 96, "inputtokenextraid": 0, "inputtokenid": 0, "insert": [7, 16, 29, 74, 83], "insertinputtensor": 1, "insid": [1, 12, 17, 19, 20, 27, 28, 66, 68, 83, 90, 98], "insight": [26, 29], "insiz": 1, "inspect": [30, 73, 90], "inspir": 27, "instabl": 2, "instal": [19, 31, 32, 56, 57, 58, 66, 70, 76, 89, 94, 96], "instanc": [0, 2, 3, 6, 7, 8, 12, 16, 26, 29, 37, 53, 70, 71, 88, 90, 94, 98], "instance_idx": 93, "instanti": [76, 82, 99], "instead": [7, 9, 12, 16, 19, 20, 21, 29, 37, 66, 71, 81, 82, 83, 90, 94], "instruct": [12, 20, 28, 31, 35, 40, 47, 62, 66, 74, 75, 76, 77, 81, 82, 89, 92, 94, 95, 96], "instrument": 28, "int": [0, 1, 6, 14, 15, 16, 19, 49, 53, 71, 80, 83, 84, 85, 88, 96, 98, 99, 100], "int32": [1, 5, 30, 83, 86, 93], "int32_t": [0, 1, 83], "int4": [17, 19, 25, 29, 30, 37, 60, 65, 92, 94], "int4_weight": 91, "int64": [1, 6, 83, 93], "int64_t": [0, 1], "int8": [1, 15, 17, 19, 25, 29, 30, 65, 71, 78, 83, 90, 92, 94], "int8_kv_cach": [5, 91, 94], "int8_t": [0, 1], "int8_weight": 91, "int8awq": 78, "int_clip": 83, "integ": [5, 71, 74, 83, 91, 94], "integr": [12, 29, 94, 97, 98, 99, 100], "intellig": 72, "intend": 90, "intens": [28, 29], "intent": 76, "intention": 19, "intenum": 83, "inter": [2, 29, 76, 77, 78, 80, 81, 93, 94], "inter_layernorm": 85, "inter_s": 17, "interact": [3, 12, 29, 72, 89, 93], "interchang": [8, 70], "interconect": 77, "interconnect": [6, 76, 77, 78, 80, 81], "interest": [29, 74], "interfac": [16, 19, 76, 88, 94, 96, 97], "interfer": [29, 93], "interleav": [5, 16, 28], "intermedi": [5, 16, 28, 71, 93], "intermediate_s": [15, 85], "intern": [1, 3, 5, 8, 19, 20, 26, 28, 76, 79, 90, 93, 99], "internal_error": [30, 31], "internlm": [70, 91, 92, 94], "internlm2": [91, 92, 94], "internvl2": 94, "interpol": 83, "interpolation_scal": 84, "interpret": [3, 66, 80], "intersect": 2, "intertwin": 81, "intflag": [85, 87], "intpsplitdim": 1, "intra": 77, "introduc": [19, 22, 26, 27, 29, 33, 91, 94], "introduct": [79, 89, 94], "inttensor": [88, 96], "intuit": [28, 72, 79], "inv": 83, "inv_freq": 83, "invalid": [29, 93, 94], "invalidateremoteag": 0, "inventori": 74, "invers": 5, "invest": 74, "investig": [20, 94], "invit": 60, "invoc": 94, "invok": [0, 3, 7, 29, 70, 93, 100], "invokequant": 16, "involv": [0, 1, 2, 12, 16, 25, 27, 28, 84, 97, 98, 99], "io": [5, 32, 33, 90, 94], "ip": [0, 94], "ipc": 66, "ipc_uc_handl": 1, "ipc_uc_ptr": 1, "ipc_uc_va": 1, "ipcmemori": 1, "ipcnvl": 1, "ipcnvlsalloc": 1, "ipcnvlsfre": 1, "ipcnvlshandl": 1, "ipcnvlssupport": 1, "ipluginv3lay": 83, "ireducelay": 83, "irrespect": [0, 6, 53, 71], "is_alibi": 83, "is_caus": 84, "is_const_v": 1, "is_cuda_graph": 98, "is_def": 83, "is_dora": 10, "is_dynam": 83, "is_enc_dec": 88, "is_expert": 84, "is_gated_activ": 83, "is_gemma_2": 85, "is_gemma_3": 85, "is_keep_al": 71, "is_loc": 84, "is_medusa_mod": 88, "is_mla_en": 83, "is_mla_enabled_flag": 83, "is_module_excluded_from_quant": 71, "is_mrop": 83, "is_network_input": 83, "is_orchestrator_mod": 88, "is_public_pool": 71, "is_qkv": 84, "is_redrafter_mod": 88, "is_rop": 83, "is_trt_wrapp": 83, "is_use_oldest": 71, "is_valid": 84, "is_valid_cross_attn": 84, "isagentst": 0, "isauto": 0, "isbeamsearch": 0, "iscomplet": 0, "iscontextparallel": 1, "iscontinuouskvcach": 1, "iscrossattent": 1, "isdon": 1, "isdora": 1, "isdrafttokensextern": 1, "iseagl": [0, 1], "iselectlay": 83, "isexplicitdrafttoken": [0, 1], "isexternaldrafttoken": 0, "isfin": [0, 3], "isfirstcontextparallelrank": 1, "isfirstpipelineparallelrank": 1, "isfirsttensorparallelrank": 1, "isgreedysampl": 0, "ishufflelay": 83, "iskvcacheen": 1, "isl": [0, 21, 22, 23, 24, 26, 27, 28, 29, 74, 75, 81], "islastpipelineparallelrank": 1, "isleg": 0, "islicelay": 83, "isload": 1, "islookahead": 0, "islookaheaddecod": 1, "ismedusa": [0, 1], "ismpist": 0, "ismultimod": 1, "isn": [29, 93], "isnon": 1, "isoftmaxlay": 83, "isorchestr": 0, "ispagedkvcach": 1, "isparticip": [0, 94], "ispipelineparallel": 1, "ispoint": 1, "isrnnbas": 1, "issequencefin": [0, 3], "issocketst": 0, "issu": [5, 16, 19, 29, 59, 65, 66, 68, 70, 74, 75, 76, 83, 93], "istensorparallel": 1, "isthreadsaf": 0, "istopk": 0, "istopkandtopp": 0, "istopkortopp": 0, "istopp": 0, "istransformerbas": 1, "istream": [0, 1], "isunsign": 1, "isusebantoken": 0, "isusebanword": 0, "isuseexpliciteosstop": 0, "isusefrequencypenalti": 0, "isusemaxlengthstop": 0, "isuseminlength": 0, "isuseminp": 0, "isusenorepeatngrams": 0, "isuseoccurrencepenalti": 0, "isusepenalti": 0, "isusepresencepenalti": 0, "isuserepetitionpenalti": 0, "isusestopcriteria": 0, "isusestopword": 0, "isusetemperatur": 0, "isusevariablebeamwidthsearch": 0, "iswhisp": 1, "ite": 88, "item": [0, 3, 28, 88], "itensor": [0, 83], "itensorbind": 1, "itensorptr": 1, "iter": [0, 1, 3, 5, 12, 17, 26, 27, 29, 31, 71, 72, 74, 76, 80, 81, 82, 88, 94], "iter_stats_max_iter": 71, "iterationresult": 71, "iterationstat": 0, "iterationtyp": 0, "iterlatencym": [0, 31], "iterlatencymillisec": 94, "iterstat": 0, "iterstatsmaxiter": 0, "iterstatsvec": 0, "ith": 83, "itl": [29, 78, 81, 94], "its": [0, 1, 3, 5, 6, 7, 8, 13, 15, 16, 17, 19, 21, 23, 26, 27, 29, 46, 70, 72, 74, 77, 79, 80, 81, 83, 90, 97, 98, 100], "itself": [3, 28, 29, 88], "itsuji": 74, "iunarylay": 83, "j": [5, 6, 22, 25, 27, 56, 57, 58, 70, 74, 83, 91, 92, 94], "jacobi": 12, "jai": 94, "jamesthez": 94, "jane": 60, "janpetrov": 94, "japanes": [10, 74], "jax": [15, 19], "ji": 83, "jit": [20, 68, 94], "jj": 83, "jk": 83, "jl749": 94, "job": [16, 57, 58], "joint": 28, "joint_attention_kwarg": 85, "joint_attn_forward": 84, "journei": [26, 72], "jpg": 74, "json": [0, 1, 3, 15, 29, 31, 34, 35, 36, 39, 40, 46, 53, 71, 73, 74, 89, 94], "json_object": 71, "jsonconfigstr": 0, "jsonl": 74, "jsonseri": 0, "judgement": 29, "just": [0, 1, 12, 27, 28, 29, 56, 57, 58, 59, 68, 74, 76, 82, 88, 90], "justic": [43, 47, 48, 50, 51, 59], "k": [1, 5, 6, 10, 12, 18, 26, 27, 28, 71, 83, 91, 93, 94, 96, 98], "k_b_proj_tran": 83, "k_dim": 83, "k_proj": [17, 96], "kattent": 1, "kattn_dens": 1, "kattn_k": 1, "kattn_q": 1, "kattn_qkv": 1, "kattn_v": 1, "kauto": 0, "kbatchedpostprocessornam": [0, 3], "kbeamsearch": 0, "kbf16": 0, "kblk": 0, "kbool": [0, 1], "kbyte_typ": 1, "kc_cache_retention_config": 94, "kcancel": 0, "kchatglm": 1, "kcontext": 1, "kcontext_in_progress": 0, "kcontinu": 1, "kcpu": [0, 1], "kcpu_pin": 0, "kcpu_pinnedpool": 0, "kcross_attn_dens": 1, "kcross_attn_k": 1, "kcross_attn_q": 1, "kcross_attn_qkv": 1, "kcross_attn_v": 1, "kdatatyp": 1, "kdecoder_onli": [0, 13], "kdefault": 0, "kdefault_num_tokens_per_block": 1, "kdefaultbatchsizet": 0, "kdefaultdynamicbatchmovingaveragewindow": 0, "kdefaultgpuspernod": 1, "kdefaultiterstatsmaxiter": 0, "kdefaultlookaheaddecodingngram": 0, "kdefaultlookaheaddecodingverificationset": 0, "kdefaultlookaheaddecodingwindow": 0, "kdefaultmaxadapters": 0, "kdefaultmaxpagesperblockdevic": 0, "kdefaultmaxpagesperblockhost": 0, "kdefaultmaxseqidlemicrosecond": 0, "kdefaultoptimaladapters": 0, "kdefaultprior": 0, "kdefaultrequeststatsmaxiter": 0, "kdefaultretentionprior": 0, "kdisabl": 1, "kdrafttokensextern": 1, "kdram": 0, "kdynamicpostprocessornameprefix": 0, "keagl": [0, 1], "kebnf_grammar": [0, 3], "keep": [0, 5, 6, 19, 26, 28, 29, 71, 75, 82, 83, 94], "keepdim": 83, "kei": [0, 2, 3, 9, 16, 21, 25, 27, 28, 29, 65, 74, 75, 80, 85, 88, 93, 97, 98, 99], "kenabl": 1, "kencdec": 1, "kencoder_decod": 0, "kencoder_in_progress": 0, "kencoder_onli": 0, "kend_id": 0, "kept": [5, 19, 29, 71, 83], "kequal_progress": 0, "kera": 19, "kernel": [1, 5, 9, 16, 21, 27, 28, 30, 53, 68, 72, 73, 78, 81, 83, 88, 89, 90, 93, 94], "kernel_s": [83, 84], "kexplicitdrafttoken": [0, 1], "kexternaldrafttoken": 0, "key_length": [83, 84], "keyvaluecacheparam": [84, 85], "keyword": [17, 71, 83, 90], "kfile": 0, "kfirst_come_first_serv": 0, "kfloat": [1, 16], "kfp16": 0, "kfp32": [0, 71], "kfp8": 0, "kgener": 1, "kgeneration_complet": 0, "kgeneration_in_progress": 0, "kglm": 1, "kgpt": 1, "kgpu": [0, 1], "kguaranteed_no_evict": 0, "khalf": 1, "kind": [4, 5, 7, 26, 29, 100], "kinflight": 0, "king": 52, "kint32": [0, 1], "kint64": [0, 1], "kint8": [0, 1], "kinvalid": 1, "kispoint": 1, "kisunsign": 1, "kj": 83, "kjson": [0, 3], "kjson_schema": [0, 3], "kleader": [0, 2], "klength": 0, "klinear": 1, "klookahead": 0, "klookaheaddecod": 1, "kmamba": 1, "kmax_util": 0, "kmaxretentionprior": 0, "kmedusa": [0, 1], "kminretentionprior": 0, "kmla": 0, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kmlp_gate_up": 1, "kmlp_h_to_4h": 1, "kmlp_router": 1, "kmoe_4h_to_h": 1, "kmoe_gat": 1, "kmoe_h_to_4h": 1, "kmoe_rout": 1, "kmpi": 0, "knegativeinfin": 1, "knob": [0, 71, 82, 83], "knone": 1, "knoop": 1, "knot_finish": 0, "know": [6, 73, 82, 83], "knowledg": 65, "known": [5, 12, 16, 29, 65, 68, 83, 92], "knumflag": 0, "kobj": 0, "kopt_profiles_split_point": 1, "korchestr": [0, 2], "kosmo": [92, 94], "kpage": 1, "kpin": 1, "kpinnedpool": 1, "kqueu": 0, "kread": 0, "krecurr": 1, "krecurrentgemma": 1, "kregex": [0, 3], "kstatic": 0, "kstatic_batch": 0, "kstop_word": 0, "kstructural_tag": 0, "ktimed_out": 0, "ktopk": 0, "ktopktopp": 0, "ktopp": 0, "ktrtpointertyp": 1, "kuint8": [0, 1], "kunderlyingtyp": 1, "kunish": 10, "kunknown": 0, "kunsign": 1, "kusebantoken": 0, "kusebanword": 0, "kuseexpliciteosstop": 0, "kusefrequencypenalti": 0, "kusemaxlengthstop": 0, "kuseminlength": 0, "kuseminp": 0, "kusenorepeatngrams": 0, "kuseoccurrencepenalti": 0, "kusepenalti": 0, "kusepresencepenalti": 0, "kuserepetitionpenalti": 0, "kusestandardstopcriteria": 0, "kusestopword": 0, "kusetemperatur": 0, "kusevariablebeamwidthsearch": 0, "kuvm": [0, 1], "kv": [0, 1, 2, 3, 10, 16, 19, 21, 25, 27, 28, 30, 31, 37, 41, 42, 44, 45, 55, 65, 69, 71, 72, 74, 75, 76, 80, 83, 88, 89, 94, 95, 96, 97, 98, 100], "kv_b_proj": 83, "kv_cach": 0, "kv_cache_block_offset": [83, 84, 88, 93], "kv_cache_block_point": 93, "kv_cache_config": [31, 37, 44, 45, 50, 52, 54, 55, 71, 82, 99], "kv_cache_dtyp": [52, 71, 74, 78, 87, 99], "kv_cache_enable_block_reus": [88, 94], "kv_cache_free_gpu_mem_fract": [20, 29, 75, 82], "kv_cache_free_gpu_memory_fract": [31, 38, 88, 94], "kv_cache_host_memory_byt": 9, "kv_cache_manag": [0, 94, 97, 98, 99, 100], "kv_cache_param": [84, 85, 98], "kv_cache_quant_algo": [15, 60, 71, 74, 78], "kv_cache_quant_mod": [5, 83], "kv_cache_retention_config": 71, "kv_cache_scaling_factor": [5, 15], "kv_cache_typ": [16, 30, 71, 88, 94], "kv_dtype": 85, "kv_event": 52, "kv_head": 84, "kv_host_cache_byt": 9, "kv_lora_rank": [83, 84], "kv_orig_quant_scal": 83, "kv_quant_orig_scal": 83, "kvalue_status_load": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvcach": [0, 26, 44, 45, 55, 94], "kvcacheblock": 8, "kvcacheblockpool": 8, "kvcacheconfig": [0, 5, 9, 37, 44, 45, 50, 52, 54, 55, 71, 82, 90], "kvcachecreateddata": [0, 71], "kvcacheev": 0, "kvcacheeventdata": 0, "kvcacheeventdiff": 0, "kvcacheeventmanag": [0, 65], "kvcachehitr": 0, "kvcachehitrateperrequest": 0, "kvcacheindex": 1, "kvcachemanag": [0, 5, 9, 88, 98, 99], "kvcachemetr": 0, "kvcacheparam": 98, "kvcacheremoveddata": [0, 71], "kvcacheretentionconfig": [0, 71], "kvcaches": 0, "kvcachestat": [0, 31], "kvcachestoredblockdata": 0, "kvcachestoreddata": [0, 71], "kvcachetransferend": 0, "kvcachetransferm": 0, "kvcachetransfermod": [0, 71], "kvcachetransferstart": 0, "kvcachetyp": [1, 71, 88], "kvcachetypefromstr": 1, "kvcacheupdateddata": [0, 71], "kvfactor": 0, "kvheadnum": 83, "kvram": 0, "kwarg": [17, 19, 71, 83, 84, 85, 88, 94, 96], "kwrite": 0, "kxgrammar": 0, "l": [12, 31, 56, 57, 58, 74, 92], "l2": 30, "l20": 30, "l304": 26, "l345": 26, "l4": 30, "l40": 30, "l440": 26, "l506": 26, "l546": 26, "l823": 26, "lab": 74, "label": [7, 83, 84, 85], "labelembed": 84, "lack": 0, "lai": 27, "lambda": [0, 3], "lamportinitializeal": 1, "languag": [0, 6, 12, 16, 18, 21, 26, 29, 72, 73, 83, 91, 92, 94, 97], "language_adapt": [88, 94], "language_adapter_config": 88, "language_adapter_rout": [85, 88], "language_adapter_uid": 88, "language_model": 17, "languageadapterconfig": 88, "languageadapteruid": 0, "larg": [5, 9, 11, 12, 16, 18, 19, 20, 21, 25, 26, 28, 30, 35, 53, 62, 72, 73, 74, 77, 78, 80, 81, 83, 90, 92, 93, 94, 97], "larger": [0, 2, 5, 6, 9, 12, 13, 20, 22, 23, 25, 28, 55, 71, 74, 75, 83, 88, 90, 94], "largest": [6, 21, 22, 23, 83], "last": [0, 1, 3, 5, 10, 11, 12, 14, 26, 27, 29, 71, 80, 82, 83, 85], "last_lay": 88, "last_process_for_ub": 83, "last_token_id": [83, 85, 93], "last_token_ids_for_logit": 85, "last_tokens_id": 83, "lastdraftindic": 1, "lastdraftlen": 1, "lastdraftpath": 1, "lastdrafttoken": 1, "lastgenerationlength": 1, "lastit": 0, "lastpositionidsbas": 1, "lasttokentim": 0, "late": 59, "latenc": [0, 5, 9, 12, 22, 23, 25, 27, 28, 29, 30, 65, 71, 75, 80, 81, 82, 83, 94], "latent": [28, 84, 85], "later": [0, 1, 6, 10, 12, 16, 19, 23, 47, 50, 70, 78, 81, 88, 90, 93, 95], "latest": [0, 16, 28, 32, 66, 89, 94], "latter": [3, 25, 94], "launch": [2, 9, 16, 28, 29, 31, 53, 56, 57, 58, 65, 68, 70, 76, 93, 94, 95], "launch_llama_3": 16, "layer": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 15, 16, 17, 27, 29, 30, 71, 77, 83, 88, 89, 90, 91, 93, 94, 96, 98, 99], "layer1": 10, "layer_idx": [10, 14, 83, 88, 96, 98], "layer_names_onli": [30, 71], "layer_norm": [83, 84], "layer_quant_mod": 71, "layer_typ": 88, "layer_updates_per_it": 29, "layerid": [1, 10], "layeridx": 1, "layernorm": [14, 30, 81, 83, 84, 94], "layernorm_shar": 84, "layernorm_typ": 84, "layernormpositiontyp": 83, "layernormtyp": [83, 84], "layertyp": [1, 7], "layerwis": 71, "layout": [80, 94], "lead": [7, 9, 12, 16, 29, 30, 59, 66, 74, 75, 76, 78, 80, 81], "leader": [0, 88], "learn": [22, 23, 25, 29, 43, 47, 48, 50, 51, 53, 78, 83, 89], "learned_absolut": [15, 83, 84, 85], "least": [0, 3, 5, 19, 20, 29, 31, 59, 80, 88], "leav": [60, 80, 81, 82], "left": [71, 75, 80, 82, 83], "legaci": [17, 82, 86, 94], "len": [0, 1, 74, 83, 88, 100], "length": [0, 1, 5, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 54, 71, 74, 75, 76, 79, 81, 82, 83, 88, 90, 93, 94, 98, 99], "length_penalti": [6, 71, 88], "lengthlengthpenalti": 6, "lengthpenalti": [0, 1, 6], "less": [0, 3, 5, 6, 16, 22, 27, 29, 71, 75, 83], "let": [7, 14, 15, 17, 26, 29, 32, 37, 72, 74, 80, 83], "letter": 83, "level": [0, 1, 3, 5, 8, 10, 14, 15, 17, 19, 27, 28, 30, 31, 50, 70, 73, 74, 90, 94, 96], "leverag": [12, 21, 26, 27, 29, 78, 89], "lf": [10, 20, 66, 70], "lfz941": 94, "lh": 1, "lib": [19, 68, 74], "libnam": 0, "libnvinfer_plugin_tensorrt_llm": 66, "libopenmpi": [67, 68], "librari": [16, 18, 29, 66, 70, 72, 93, 94, 98], "libtensorrt_llm": 66, "licens": [70, 89], "life": 59, "lifecycl": 8, "lightweight": [5, 29], "like": [0, 3, 5, 6, 7, 9, 12, 15, 16, 18, 19, 25, 26, 27, 28, 29, 30, 37, 43, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 71, 72, 74, 76, 77, 78, 80, 81, 82, 83, 89, 90, 91, 93, 94, 95, 96, 97, 99], "likelihood": [4, 9, 12], "limit": [0, 2, 3, 5, 6, 7, 16, 19, 20, 25, 26, 27, 28, 29, 37, 68, 70, 71, 72, 76, 80, 82, 83, 86, 88, 90, 92, 98], "lin": 21, "line": [9, 20, 25, 74, 76, 78, 81, 90, 94, 99, 100], "linear": [1, 10, 12, 14, 15, 16, 28, 83, 90, 91, 94, 96, 98], "linearactiv": 84, "linearapproximategelu": 84, "linearbas": 84, "lineargeglu": 84, "lineargelu": 84, "linearli": 90, "linearswiglu": 84, "link": [9, 20, 26, 32, 33, 94], "linspac": 83, "linux": [65, 92, 94], "linux_x86_64": 66, "list": [0, 1, 3, 5, 6, 7, 15, 16, 17, 18, 27, 29, 37, 53, 66, 69, 71, 72, 74, 75, 76, 83, 84, 85, 88, 92, 93, 94, 98, 99, 100], "list_siz": 84, "liter": 71, "littl": [27, 29, 81], "live": 90, "livecodebench": 26, "lkm2835": 94, "ll": [25, 31], "ll128": 29, "llama": [6, 10, 12, 13, 17, 19, 22, 23, 25, 30, 47, 55, 70, 76, 77, 79, 80, 82, 89, 91, 92, 94, 95, 96], "llama2": [5, 10, 21, 22, 94], "llama3": 83, "llama4": [29, 71], "llama4forconditionalgener": 92, "llama_13b": 23, "llama_70b": 23, "llama_7b": [10, 13], "llama_7b_with_lora_qkv": 10, "llama_model_path": 37, "llamaconfig": [85, 96], "llamaforcausallm": [17, 19, 85, 92], "llamamodel": 85, "llava": [17, 91, 92, 94], "llava_dict": 17, "llavallamamodel": 92, "llavanextforconditionalgener": 92, "llavanextvisionconfig": 85, "llavanextvisionwrapp": 85, "llm": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 14, 16, 21, 24, 26, 28, 30, 31, 34, 35, 36, 38, 39, 40, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 59, 60, 61, 62, 63, 67, 68, 69, 71, 73, 75, 77, 78, 79, 81, 82, 83, 85, 87, 88, 91, 93, 95, 96, 97, 98, 99, 100], "llm_arg": [71, 75], "llm_engine_dir": 88, "llm_id": 71, "llm_inference_distribut": 70, "llm_kwarg": [44, 45, 55], "llm_mgmn_": 94, "llm_option": 75, "llm_ptq": 95, "llmapi": [3, 29, 31, 37, 44, 45, 46, 50, 52, 54, 55, 56, 57, 58, 60, 71, 75, 78, 94], "llmarg": [71, 75, 94], "llmrequest": [1, 99, 100], "llmrequestptr": 1, "llmrequestst": 100, "lm": 12, "lm_head": [14, 17, 55, 74, 94], "lmm": [6, 74], "lmsy": [44, 45, 55], "ln_emb": 17, "ln_f": [14, 17], "load": [0, 1, 10, 14, 15, 16, 19, 24, 26, 28, 30, 47, 50, 55, 68, 70, 71, 74, 75, 76, 81, 82, 85, 87, 88, 89, 90, 94], "load_format": 71, "load_model_on_cpu": 85, "load_tensor": 17, "load_test_audio": 88, "load_test_data": 88, "load_weight": 96, "loaded_weight": 84, "loader": 94, "loadformat": 71, "loadinprogress": 1, "loadremoteag": 0, "loadweight": 1, "local": [15, 16, 20, 26, 29, 30, 47, 48, 49, 50, 51, 56, 57, 58, 60, 66, 68, 71, 74, 75, 78, 94, 99], "local_in_featur": 84, "local_layer_idx": 84, "local_model": [56, 57, 58], "local_out_featur": 84, "local_us": [20, 66, 89], "localhost": [31, 34, 35, 36, 38, 39, 40, 61, 62, 63, 89], "localinadapters": 1, "localindim": 1, "localinouts": 1, "localins": 1, "localoutadapters": 1, "localoutdim": 1, "localouts": 1, "localreduct": 26, "localscaless": 1, "localtotals": 1, "locat": [6, 7, 16, 28, 29, 60, 66, 74, 75, 83, 89, 93, 98], "locate_accepted_draft_token": 88, "lock": [29, 68, 74], "lockstep": 0, "log": [0, 1, 5, 8, 30, 31, 32, 56, 57, 58, 60, 71, 74, 83, 89, 90, 94], "log_level": [30, 31], "log_softmax": 83, "logic": [3, 8, 17, 19, 53, 84, 94, 96, 97, 100], "login": [32, 89], "logit": [0, 1, 6, 12, 26, 27, 41, 42, 71, 74, 83, 85, 88, 93, 94], "logits_dtyp": [15, 30, 85], "logits_processor": [53, 71, 88], "logits_processor_map": 88, "logits_processor_nam": 88, "logitspostprocessor": 0, "logitspostprocessorbatch": [0, 3], "logitspostprocessorconfig": [0, 3, 94], "logitspostprocessormap": 0, "logitspostprocessornam": 0, "logitsprocessor": [53, 71, 88, 94], "logitsprocessorlist": 88, "logitsvec": 1, "logn": [83, 94], "logn_scal": 83, "logprob": [0, 1, 37, 54, 71, 89], "logprobs_diff": 71, "logprobscba": 1, "logprobstil": 1, "london": 93, "long": [5, 25, 29, 30, 73, 74, 76, 77, 78, 80, 81, 90, 94], "long_mscal": [83, 84], "long_rop": 83, "long_rope_embed_posit": 84, "long_rope_embed_positions_for_gpt_attent": 84, "long_rope_rotary_cos_sin": 83, "long_rope_rotary_inv_freq": [83, 84], "longer": [0, 6, 9, 26, 28, 29, 71, 75, 80, 83, 100], "longest": [2, 27, 80, 83], "longrop": 83, "longtensor": 88, "look": [0, 3, 19, 24, 29, 66, 72, 74, 94], "lookahead": [0, 1, 41, 42, 65, 71, 94], "lookahead_config": [54, 71, 88], "lookahead_decod": [30, 85], "lookaheadconfig": 0, "lookaheaddecod": 1, "lookaheaddecodingbuff": 1, "lookaheaddecodingconfig": [0, 1, 54, 71], "lookaheadinput": 1, "lookaheadoutput": 1, "lookaheadruntimebuff": 1, "lookaheadruntimeconfig": 1, "lookup": [65, 83, 84, 94], "lookup_plugin": 83, "loop": [0, 3, 6, 16, 17, 71, 82], "lopuhin": 94, "lora": [0, 1, 3, 41, 42, 65, 69, 71, 83, 84, 85, 88, 94], "lora_ckpt_sourc": [30, 88], "lora_config": [59, 71, 85], "lora_dir": [10, 30, 59, 88], "lora_dir1": 59, "lora_dir2": 59, "lora_dir3": 59, "lora_hidden_st": 84, "lora_layer_param": 84, "lora_manag": [59, 71, 88, 94], "lora_param": 85, "lora_plugin": [10, 30, 83, 88], "lora_rank": [10, 83], "lora_request": [59, 71], "lora_runtime_param": 84, "lora_target_modul": [10, 30, 85, 88], "lora_task_uid": 88, "lora_uid": 88, "lora_weights_point": 83, "loracachefullexcept": 1, "loracachepagemanag": 1, "loraconfig": [0, 10, 59, 71, 85, 94], "loraexpectedexcept": 1, "loraid": 0, "loramanag": 88, "loramodulenam": 1, "loraparam": 85, "loraprefetchdir": 0, "lorarequest": [59, 71], "loraruntimeparam": 84, "lorataskidtyp": [0, 1], "loraweight": 10, "loss": [25, 78], "lot": [5, 9, 16, 18, 27], "loudspeak": 23, "lovelac": [72, 92, 94], "low": [5, 14, 19, 20, 25, 26, 27, 28, 29, 30, 65, 83, 94], "low_latency_gemm": 83, "low_latency_gemm_plugin": [30, 74, 78, 84], "low_latency_gemm_swiglu": 83, "low_latency_gemm_swiglu_plugin": [30, 78, 86], "low_rank": 83, "lower": [0, 1, 2, 6, 7, 9, 10, 24, 25, 28, 50, 71, 75, 78, 83, 90], "lowprecis": [11, 83], "lpddr5x": 29, "lru": [1, 9, 83], "lt": 83, "lunch": 29, "luotuo": 10, "m": [0, 20, 22, 26, 29, 31, 39, 40, 46, 59, 74, 75, 76, 78, 80, 81, 83, 90, 91], "macceptancethreshold": 0, "machin": [9, 20, 25, 53, 94], "madditionalmodeloutput": 0, "maddr": 0, "made": [53, 72, 94, 100], "magentnam": 0, "magic": 29, "mahmoudashraf97": 94, "mai": [0, 1, 2, 3, 5, 9, 10, 11, 12, 15, 16, 17, 19, 20, 26, 27, 29, 30, 32, 56, 57, 58, 66, 68, 70, 73, 74, 75, 76, 81, 82, 83, 84, 86, 90, 93, 94, 96, 97, 98, 99], "main": [3, 6, 8, 21, 24, 26, 27, 29, 35, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 60, 62, 67, 68, 70, 71, 73, 76, 78, 81, 82, 83, 89, 90, 93, 95, 96], "mainli": [27, 29], "maintain": [2, 10, 21, 22, 25, 29, 74, 78, 91], "major": [19, 26, 60, 72, 75, 90], "make": [1, 2, 5, 7, 10, 16, 19, 20, 25, 26, 27, 29, 32, 33, 54, 59, 65, 66, 72, 74, 76, 82, 83, 89, 93, 94], "make_causal_mask": 84, "makeshap": 1, "maketransferag": 0, "mallotedtim": 0, "mallreducecommptr": 1, "mamba": [30, 70, 83, 91, 92, 94], "mamba1": 83, "mamba2": [83, 94], "mamba_conv1d": 83, "mamba_conv1d_plugin": [30, 88], "mamba_vers": 83, "mambaconfig": 85, "mambaforcausallm": 85, "manag": [0, 1, 2, 5, 12, 16, 28, 29, 30, 37, 65, 68, 70, 76, 82, 86, 88, 89, 90, 94, 95, 97, 98], "managedweight": 0, "managedweightsmap": 1, "manageweightstyp": 1, "manageweighttyp": 1, "mandatori": [1, 3, 15], "mani": [0, 5, 8, 9, 12, 16, 19, 27, 28, 29, 30, 33, 60, 71, 75, 78, 80, 82, 83, 92, 93], "manipul": 7, "manner": [7, 29], "mantissa": 22, "manual": [28, 29, 37, 71, 88, 93], "manufactur": 74, "map": [0, 1, 2, 3, 5, 7, 11, 14, 15, 16, 17, 19, 26, 29, 75, 83, 84, 85, 88, 89, 99], "marcellu": 52, "mard1no": 94, "margin": [74, 80], "mark": [1, 7, 80, 83, 93], "mark_as_remov": 7, "mark_output": [3, 83], "markalldon": 1, "markdon": 1, "marker": 71, "marks101": 94, "marktaskdon": 1, "mask": [0, 1, 5, 12, 26, 27, 53, 83, 84, 85, 88, 98], "mask_typ": 83, "masked_scatt": 83, "masked_scatter_": 83, "masked_select": [83, 94], "massiv": 20, "master": [77, 78, 79], "mat2": 83, "match": [0, 4, 7, 12, 27, 65, 71, 74, 83, 84, 88, 89, 93, 94], "match_and_rewrit": 7, "materi": 3, "math": [26, 28, 92], "matichon": 94, "matmul": [5, 16, 30, 78, 83, 91], "matric": 4, "matrix": [5, 16, 24, 28, 65, 72, 74, 77, 83, 89, 98], "mattentionconfig": 0, "mattentiontyp": 0, "matter": 9, "matur": 31, "max": [0, 1, 10, 21, 22, 23, 28, 29, 65, 71, 76, 78, 79, 81, 83, 88, 90, 93, 98], "max_all_reduce_block": 1, "max_attention_window": [71, 82, 94], "max_attention_window_s": [5, 82, 83, 88], "max_attn_valu": 84, "max_batch_s": [5, 10, 13, 15, 16, 19, 20, 27, 30, 31, 37, 38, 44, 45, 50, 54, 55, 71, 74, 78, 80, 81, 83, 85, 88, 90, 93, 94, 99], "max_beam_width": [3, 5, 30, 31, 37, 50, 71, 83, 85, 88, 90], "max_block": [83, 100], "max_blocks_per_seq": 88, "max_blocks_per_sequ": 83, "max_boost_slid": 74, "max_cache_storage_gb": 71, "max_context_length": [83, 84, 88, 90], "max_cpu_lora": 71, "max_decoder_input_len": 85, "max_decoder_seq_len": 30, "max_dist": [5, 83, 84], "max_draft_len": [30, 44, 45, 55, 71, 85, 87], "max_draft_token": [85, 88], "max_encoder_input_len": [30, 71, 85], "max_gen_token": 85, "max_input_len": [10, 13, 15, 16, 30, 71, 74, 85, 88, 90], "max_input_length": [83, 84, 85, 88], "max_kv_seqlen": 83, "max_lora": 71, "max_lora_rank": [10, 30, 59, 71], "max_low_rank": 83, "max_matching_ngram_s": 71, "max_medusa_token": 88, "max_multimodal_len": 30, "max_new_token": [88, 90], "max_ngram_s": [54, 71], "max_non_leaves_per_lay": [44, 45, 71], "max_num_request": [98, 99, 100], "max_num_token": [20, 30, 31, 37, 38, 50, 71, 74, 78, 80, 81, 85, 90, 94, 98], "max_output_len": [16, 88, 89, 93, 94], "max_period": 84, "max_position_embed": [15, 83, 84, 85], "max_position_embedding_len": 83, "max_power_limit": 74, "max_prompt_adapter_token": 71, "max_prompt_embedding_table_s": [30, 71, 88, 94], "max_record": 71, "max_seq_len": [10, 13, 15, 16, 30, 31, 44, 45, 55, 71, 74, 82, 83, 84, 85, 88, 90, 94, 99], "max_seqlen": [5, 83], "max_seqlen_for_logn_sc": 84, "max_sequence_length": [5, 88], "max_token": [31, 34, 35, 36, 46, 52, 61, 62, 63, 71, 82, 89, 95], "max_tokens_in_paged_kv_cach": [82, 88, 94], "max_util": [0, 71, 82], "max_verification_set_s": [54, 71], "max_window_s": [54, 71], "maxaccepteddrafttokensperstep": 1, "maxacceptedtoken": 1, "maxadapters": 0, "maxattentionwindow": 1, "maxattentionwindowvec": [0, 1], "maxbadwordslen": 1, "maxbatchs": [0, 1, 6], "maxbatchsizeruntim": 0, "maxbatchsizeruntimeupperbound": 0, "maxbatchsizestat": 0, "maxbatchsizetunerrecommend": 0, "maxbeamwidth": [0, 1, 3, 94], "maxdecoderstep": 1, "maxdecodingdrafttoken": 1, "maxdecodingtoken": [0, 1], "maxdraftpathlen": [0, 1], "maxdrafttoken": [0, 1], "maxencoderlen": 1, "maxgenerationlength": 1, "maxgenlengthdevic": 1, "maxgenlengthhost": 1, "maxgentoken": 1, "maxim": [0, 21, 23, 26, 28, 74, 82], "maximum": [0, 1, 2, 3, 5, 6, 20, 23, 29, 30, 31, 71, 74, 75, 78, 83, 84, 88, 90, 93, 94, 99], "maxinputlen": [1, 6], "maxinputlength": 1, "maxlength": 1, "maxlengthstop": 0, "maxlorarank": 1, "maxmedusahead": 1, "maxnewtoken": [1, 94], "maxnonleafnodesperlay": 1, "maxnumactiverequest": 0, "maxnumblock": 0, "maxnumpath": 1, "maxnumsequ": [1, 94], "maxnumtoken": [0, 1], "maxnumtokensruntim": 0, "maxnumtokensstat": 0, "maxnumtokenstunerrecommend": 0, "maxoutputlength": 3, "maxpagesperblock": 1, "maxpagesperblockdevic": 0, "maxpagesperblockhost": 0, "maxpathdraftlen": 1, "maxpathlen": [0, 1], "maxpositionembed": [0, 1], "maxpromptembeddingtables": 1, "maxqueues": 0, "maxseqidlemicrosecond": 0, "maxseqlen": 1, "maxsequencelen": [1, 6], "maxsequencelength": 1, "maxstopwordslen": 1, "maxtoken": [0, 90, 94], "maxtokensperenginestep": 1, "maxtokensperstep": 1, "mb": [71, 90], "mbackend": 0, "mbackendagentdesc": 0, "mbart": [92, 94], "mbatchingtyp": 0, "mbatchsizet": 0, "mbeamsearchbuff": 1, "mbeamsearchdiversityr": 0, "mbeamwidth": 0, "mbeamwidtharrai": 0, "mbp": 46, "mbuffer": 1, "mbuffermanag": 1, "mc_handl": 1, "mc_ptr": 1, "mc_va": 1, "mcachemap": 1, "mcachemutex": 1, "mcachepagemanag": 1, "mcachest": 0, "mcachetransceiverconfig": 0, "mcapacityschedulerpolici": 0, "mcommmod": 0, "mcommptr": 1, "mcommstat": 0, "mcommtyp": 0, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mconfig": [0, 1], "mconnectioninfo": 0, "mcontextchunkingpolici": 0, "mcontextfmha": 1, "mcontextparallel": 1, "mcopyonpartialreus": 0, "mcpu": 1, "mcpudiff": 1, "mcrosskvcachefract": 0, "mcudagraphcaches": 0, "mcudagraphmod": 0, "mcumlogprobstmp": 1, "md": [2, 12, 14, 26, 83, 94, 97], "mdatatyp": [0, 1], "mdebugconfig": 0, "mdebuginputtensor": 0, "mdebugoutputtensor": 0, "mdebugtensornam": 0, "mdebugtensorsmaxiter": 0, "mdecod": 1, "mdecodedurationm": 0, "mdecoderetentionprior": 0, "mdecoderstream": 1, "mdecodingconfig": 0, "mdecodinglayerworkspac": 1, "mdecodingmod": [0, 1], "mdefaulteaglechoic": 1, "mdefaultmedusachoic": 1, "mdefaultposteriorthreshold": 1, "mdesc": 0, "mdevic": 1, "mdevicebuffermanag": 1, "mdevicecacheperc": 0, "mdeviceid": [0, 1], "mdirectori": 0, "mdllmutex": 0, "mdogreedysampl": 1, "mdonetask": 1, "mdprank": 0, "mdpsize": 0, "mdrafttoken": 0, "mdstdesc": 0, "mdynamicbatchconfig": 0, "mdynamicbatchmovingaveragewindow": 0, "mdynamicdecodelay": 1, "mdynamictreemaxtopk": 0, "me": [35, 59, 60, 62, 89], "meaglechoic": 0, "meagleconfig": 0, "mean": [1, 4, 5, 6, 9, 12, 15, 17, 19, 20, 22, 23, 27, 28, 29, 31, 39, 40, 57, 59, 71, 73, 74, 75, 76, 77, 82, 83, 86, 88, 90], "meaning": [1, 28, 78, 81], "meant": 79, "mearlystop": 0, "measur": [0, 21, 23, 24, 25, 27, 28, 29, 65, 74, 76, 94], "mechan": [3, 16, 29, 99, 100], "media": [74, 94], "media_path": 74, "medium": [25, 93, 94], "medusa": [0, 1, 30, 41, 42, 65, 71, 83, 85, 88, 94], "medusa_choic": [12, 55, 71, 74, 88], "medusa_decode_and_verifi": 88, "medusa_hidden_act": 87, "medusa_logit": 88, "medusa_model_dir": 87, "medusa_output_token": 88, "medusa_path": 88, "medusa_position_offset": 88, "medusa_temperatur": [12, 88], "medusa_topk": 88, "medusa_tree_id": 88, "medusachoic": [0, 1], "medusaconfig": 85, "medusacurtokensperstep": 1, "medusadecodingconfig": [55, 71], "medusaforcausallm": 85, "medusainput": 1, "medusalogit": 1, "medusapath": 1, "medusatargettokensperstep": 1, "medusatreeid": 1, "meet": [25, 29, 83], "membeddingt": 0, "member": [0, 1, 6, 7, 13, 16, 60, 83], "memlock": [66, 93], "memori": [0, 1, 2, 4, 5, 6, 8, 10, 16, 17, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 37, 53, 65, 71, 74, 75, 76, 80, 81, 83, 88, 93, 94, 98, 99], "memorydesc": 0, "memorypoolfre": [1, 90], "memorypoolreserv": [1, 90], "memorypooltrimto": 1, "memorypoolus": 1, "memorytyp": [0, 1], "memorytypestr": 1, "memtyp": 1, "memusagechang": 90, "menableattentiondp": [0, 1], "menablebatchsizetun": 0, "menableblockreus": 0, "menablechunkedcontext": 0, "menablecontextfmhafp32acc": 0, "menablemaxnumtokenstun": 0, "menablepartialreus": 0, "menabletrtoverlap": 0, "mencodedvocab": 0, "mencoderhiddens": 1, "mengineaddr": 1, "menginebuff": 1, "menginepath": 1, "mengines": 1, "mental": 59, "mention": [6, 19, 20, 37, 78], "menu": [32, 33], "merg": [26, 29, 83], "meshgrid": 83, "meshgrid2d": 83, "messag": [11, 26, 31, 34, 35, 61, 62, 68, 71, 75, 83, 89, 90, 94], "met": [0, 1, 3, 12], "meta": [19, 70, 71, 74, 75, 76, 82, 89, 92], "meta_ckpt_dir": 85, "metadata": [8, 31, 96, 98], "metadata_server_config_fil": 31, "metal": [94, 95], "meth": 70, "method": [0, 1, 3, 5, 6, 12, 13, 15, 16, 19, 21, 27, 28, 29, 37, 53, 68, 71, 74, 88, 91, 93, 94, 96, 97, 99, 100], "metric": [0, 28, 29, 71, 73, 74, 75, 76, 78, 80, 81, 94], "mevent": 1, "meventbuffermaxs": 0, "mexecutionconfig": 1, "mextendedruntimeperfknobconfig": 0, "mfastlogit": 0, "mfinishedstep": 1, "mfirstgentoken": 0, "mflagptr": 1, "mfreegpumemoryfract": 0, "mfreepageid": 1, "mfrequencypenalti": 0, "mfuntowicz": 94, "mgathergenerationlogit": 0, "mgemmallreducedtyp": 1, "mgmn": [29, 41, 42], "mgpu": 1, "mgpudiff": 1, "mgpuspernod": 1, "mgpuweightsperc": 0, "mgreedysampl": 0, "mguid": 0, "mguideddecodingconfig": 0, "mguidetyp": 0, "mh": 12, "mh1": 12, "mha": [5, 8, 21, 28, 30, 83, 88, 98], "mhandler": 0, "mhiddens": 1, "mhostcaches": 0, "mi": 91, "mib": 90, "micro": [0, 90], "microbatchid": 0, "microbatchschedul": [97, 100], "microsecond": 0, "microsoft": 15, "middl": 73, "might": [0, 3, 16, 19, 20, 25, 29, 30, 66, 70, 72, 74, 76, 77, 81, 88, 90, 93, 94, 99], "migrat": [19, 86, 94], "million": [60, 74], "millisecond": 0, "millisecondstyp": 0, "mimpl": 0, "min": [0, 1, 6, 22, 26, 27, 28, 71, 74, 76, 81, 83, 93], "min_lat": 83, "min_length": [6, 88], "min_p": [0, 6, 71, 88], "min_token": 71, "mind": [25, 82], "mindim": 1, "mindimfirst": 1, "mini": 94, "minim": [26, 29, 80, 89], "minimum": [0, 5, 6, 71, 74, 75, 78, 83, 88, 90], "minitron": [92, 94], "minittozero": 1, "minlength": [1, 6, 94], "minnormedscorescba": 1, "minor": [60, 94], "minp": [0, 1, 6], "minprogresstask": 1, "minputpack": 1, "minputtokenextraid": 0, "mintoken": [0, 94], "mintpsplitdim": 1, "minut": [0, 25, 76], "mip": 0, "mipcmemoryhandl": 1, "mirco": 0, "mish": 84, "mismatch": [19, 68, 93], "misorchestr": 0, "mispagefre": 1, "miss": [0, 7, 20, 74, 94], "missedblock": 0, "missedblocksperrequest": 0, "mission": [26, 29], "mistral": [4, 70, 74, 78, 81, 91, 92, 94], "mistralai": [74, 92], "mistralforcausallm": 92, "misus": 94, "miterstatsmaxiter": 0, "mitig": [19, 26, 29], "mix": [2, 28, 77, 81, 94], "mixed_precis": 71, "mixed_sampl": 71, "mixer": 94, "mixtral": [4, 10, 70, 74, 78, 81, 91, 92, 94], "mixtralforcausallm": 92, "mixtur": [28, 29, 65, 81, 94], "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "mkdir": 32, "mkdtemp": [47, 50], "mkvcacheconfig": 0, "mkvcachetyp": 1, "mkvfactor": 0, "ml": [83, 94], "mla": [26, 27, 83, 94], "mlayertyp": 1, "mlen": 0, "mlengthpenalti": 0, "mllama": [92, 94], "mllamaconfig": 85, "mllamaforcausallm": 85, "mllamaforconditionalgener": 92, "mlogit": 0, "mlogitsdtyp": 1, "mlogitspostprocessorconfig": 0, "mlookaheaddecodingconfig": 0, "mlookaheaddecodingmaxnumrequest": 0, "mloramodul": 1, "mloraprefetchdir": 0, "mlp": [10, 14, 16, 17, 30, 83, 93, 94, 96], "mlp_4h_to_h": [10, 30], "mlp_bia": 85, "mlp_gate": [10, 30], "mlp_gate_up": [10, 30], "mlp_h_to_4h": [10, 30], "mlp_output": 93, "mlp_router": [10, 30], "mlphiddens": 1, "mlptype": 83, "mm": 94, "mm_data": 74, "mm_embedding_offload": 88, "mma": [28, 83], "mmanag": 1, "mmanagedweightsmap": 1, "mmanageweightstyp": 1, "mmaxadapters": 0, "mmaxattentionwindow": 0, "mmaxattentionwindowvec": 0, "mmaxbatchs": [0, 1], "mmaxbeamwidth": [0, 1], "mmaxdecodingdecodertoken": 1, "mmaxdecodingdrafttoken": 1, "mmaxdecodingenginetoken": 1, "mmaxdraftpathlen": 1, "mmaxencoderlen": 1, "mmaxinputlen": 1, "mmaxlorarank": 1, "mmaxnonleafnodesperlay": 1, "mmaxnumpackedmask": 1, "mmaxnumpath": 1, "mmaxnumtoken": [0, 1], "mmaxpagesperblock": 1, "mmaxpagesperblockdevic": 0, "mmaxpagesperblockhost": 0, "mmaxpositionembed": 1, "mmaxpromptembeddingtables": 1, "mmaxqueues": 0, "mmaxseqidlemicrosecond": 0, "mmaxsequencelen": 1, "mmaxsequencelength": 1, "mmaxtoken": 0, "mmedusachoic": 0, "mmemorytyp": 1, "mmha": [83, 94], "mminp": 0, "mmintoken": 0, "mmlphiddens": 1, "mmlu": [25, 26, 94], "mmlu_llmapi": 94, "mmmu": 74, "mmodelconfig": [0, 1], "mmodelnam": 1, "mmodelvari": 1, "mmoduleidtomodul": 1, "mmropepositiondelta": 0, "mmroperotarycossin": 0, "mmultiblockmod": 0, "mname": [0, 1], "mnbattentionlay": 1, "mnbhead": 1, "mnbkvheadsperlay": 0, "mnblayer": 1, "mnbrnnlayer": 1, "mngramsiz": 0, "mnnvl": 29, "mnorepeatngrams": 0, "mnormalizelogprob": 0, "mnumcopystream": [0, 1], "mnumdecodingenginetoken": 1, "mnumdevicemodulelay": 0, "mnumensurework": 0, "mnumhostmodulelay": 0, "mnumkvheadsperattentionlay": 1, "mnumkvheadspercrossattentionlay": 1, "mnumlanguag": 1, "mnumnod": 0, "mnumputwork": 0, "mnumreturnbeam": 0, "mnumreturnsequ": 0, "mnumsm": 1, "mnumtransformerslay": 1, "modal": 91, "mode": [0, 1, 4, 5, 7, 16, 17, 28, 30, 31, 46, 56, 57, 58, 71, 82, 83, 84, 88, 90, 91, 94, 96], "model": [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 13, 15, 19, 21, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 68, 71, 72, 73, 76, 79, 82, 83, 84, 86, 87, 88, 90, 91, 95, 98, 99, 100], "model_architectur": 71, "model_cl": 84, "model_cls_fil": 30, "model_cls_nam": 30, "model_config": [30, 71, 88, 96], "model_construct": 71, "model_dir": [10, 13, 14, 15, 16, 17, 19, 27, 55, 56, 74, 77, 85, 87, 89, 93], "model_engin": 99, "model_nam": [29, 57, 75, 88], "model_path": [13, 29, 57, 73, 74], "model_post_init": 71, "model_qu": 74, "model_weights_load": [17, 94], "modelconfig": [0, 6, 88, 94, 96], "modelengin": [97, 99], "modelidtomodel": 1, "modeling_deepseekv3": [26, 28], "modeling_llama": 96, "modeling_mymodel": 96, "modeling_opt": 96, "modeling_util": [71, 96], "modelnam": 1, "modelopt": [15, 19, 55, 68, 74, 75, 87, 94], "modelopt_cuda_ext": 68, "modelpath": 0, "modelrunn": [15, 88, 94], "modelrunnercpp": [88, 94], "modelrunnermixin": 88, "modeltyp": [0, 13], "modelvari": 1, "modelweightsformat": 17, "modelweightsload": [17, 94], "modern": 88, "modif": [7, 16], "modifi": [3, 7, 66, 74, 78, 81, 82, 93, 94], "modul": [0, 1, 5, 6, 14, 15, 16, 17, 26, 29, 30, 65, 66, 71, 81, 83, 84, 85, 87, 88, 93, 94, 96], "modular": [29, 72], "module1": 26, "module10": 26, "module11": 26, "module12": 26, "module13": 26, "module2": 26, "module3": 26, "module4": 26, "module5": 26, "module6": 26, "module7": 26, "module8": 26, "module9": 26, "module_id": 10, "moduleid": [1, 10], "moduleidtomodel": 1, "modulelist": 96, "moduletyp": 1, "modulo": 83, "moe": [10, 17, 26, 27, 29, 30, 51, 65, 71, 81, 83, 85, 94], "moe_4h_to_h": [10, 30], "moe_allreduce_residual_rms_norm": 83, "moe_backend": [20, 27, 71], "moe_cluster_parallel_s": 71, "moe_ep_s": 4, "moe_expert_parallel_s": [51, 71], "moe_gat": [10, 30], "moe_h_to_4h": [10, 30], "moe_load_balanc": [29, 71], "moe_max_num_token": 71, "moe_plugin": 30, "moe_rout": [10, 30], "moe_shared_": 29, "moe_tensor_parallel_s": [51, 71], "moe_tp_siz": 4, "moeconfig": 85, "moetopk": 94, "moment": 3, "monboardblock": 0, "monitor": [8, 30], "monitor_memori": [30, 71], "monolith": 5, "monost": 0, "month": 74, "mop": 0, "mopenipc": 1, "moptimaladapters": 0, "morchestratorconfig": 0, "morchleadercomm": 0, "more": [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 15, 16, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 37, 41, 52, 53, 60, 66, 71, 72, 74, 75, 76, 78, 80, 81, 82, 83, 89, 90, 93, 94, 96, 98, 100], "most": [0, 1, 6, 8, 12, 16, 19, 21, 22, 23, 25, 26, 28, 43, 47, 48, 50, 51, 71, 73, 79, 81, 82, 83, 90, 93, 94], "mostli": 29, "mount": [31, 56, 57, 58], "mount_dest": [56, 57, 58], "mount_dir": [56, 57, 58], "moutdim": 1, "moutdimfirst": 1, "moutputbeamhypothes": 1, "mouttpsplitdim": 1, "move": [0, 1, 8, 19, 29, 53, 71, 72, 83, 93, 94], "movement": [8, 16], "mownsev": 1, "mownsstream": 1, "mp4": [35, 62], "mpageblock": 1, "mpagedcontextfmha": 1, "mpagedst": 1, "mpagemanagerconfig": 1, "mpagesmutex": 1, "mpagewidth": 1, "mparallelconfig": 0, "mparticipantid": 0, "mpeftcacheconfig": 0, "mpi": [0, 1, 2, 6, 16, 18, 19, 30, 31, 56, 57, 58, 68, 71, 73, 74, 76, 83, 93, 94], "mpi4pi": [70, 76, 93, 94], "mpi_abort": 70, "mpi_barri": 19, "mpi_comm_world": [6, 70], "mpi_group_barri": 1, "mpicomm": 0, "mpicommsess": 71, "mpin": 1, "mpinneddiff": 1, "mpinnedpool": 1, "mpinnedpooldiff": 1, "mpipelineparallel": [0, 1], "mpirun": [15, 16, 70, 76, 93, 94], "mpisess": 71, "mpistat": 0, "mpointer": 1, "mpool": 1, "mport": 0, "mposteriorthreshold": 0, "mppreducescatt": 1, "mprecis": 1, "mpresencepenalti": 0, "mprocessorbatch": 0, "mprocessormap": 0, "mprompttableoffload": 0, "mpt": [25, 91, 92, 94], "mptforcausallm": 85, "mptmodel": 85, "mqa": [5, 8, 21, 24, 26, 30, 83, 94, 98], "mquantmod": 1, "mrank": [0, 1], "mrecvpollperiodm": 0, "mremotenam": 0, "mrepetitionpenalti": 0, "mreplic": 0, "mreqid": 0, "mrequeststatsmaxiter": 0, "mrnnconfig": 1, "mrope": [0, 83], "mrope_param": [84, 88], "mrope_position_delta": [83, 84, 88], "mrope_rotary_cos_sin": [83, 84], "mrope_rotary_cos_sin_s": 85, "mropeconfig": 0, "mropeparam": [84, 88], "mropepositiondelta": 0, "mroperoratysinco": 0, "mrotaryembeddingdim": 1, "mruntimedefault": 1, "mruntimestream": 1, "msamplingconfig": 1, "mscale": 83, "mscale_all_dim": 83, "mschedulerconfig": 0, "msecondaryofflineminprior": [0, 71], "msecondaryoffloadminprior": 0, "mseed": 0, "mselfidx": 0, "msg": [0, 1, 26, 71], "msinktokenlength": 0, "msizeperhead": [0, 1], "mskipcrossattnblock": 1, "msl": 1, "mslotsperpag": 1, "mspawnprocess": 0, "mspeculativedecodingconfig": 0, "mspeculativedecodingmod": 1, "mspeculativedecodingmodul": 1, "msrcdesc": 0, "mstate": [0, 1], "mstoptokenid": 0, "mstream": 1, "msyncmessag": 0, "mt5": 92, "mtag": 0, "mtaskid": 0, "mtemperatur": 0, "mtensor": 0, "mtensorparallel": [0, 1], "mtoken": 0, "mtokenizerstr": 0, "mtokenrangeretentionconfig": 0, "mtokensperblock": [0, 1], "mtopk": 0, "mtopp": 0, "mtoppdecai": 0, "mtoppmin": 0, "mtoppresetid": 0, "mtotalnumpag": 1, "mtp": [20, 29, 71, 94], "mtp3_autoregress": 26, "mtp3_top1": 26, "mtp3_top10": 26, "mtp3_top15": 26, "mtp3_vanilla": 26, "mtpdecodingconfig": 71, "mtprank": 1, "mtransfermod": 0, "mtrimpool": 1, "mtype": [0, 1], "much": [9, 16, 27, 29, 73, 75, 80, 90], "mul": 83, "multi": [0, 2, 3, 4, 6, 9, 10, 12, 15, 18, 19, 21, 27, 28, 29, 30, 35, 56, 57, 58, 62, 65, 66, 70, 71, 76, 83, 85, 90, 91, 94, 98], "multi_block_mod": [5, 71, 88, 94], "multiblockmod": 0, "multidimension": 83, "multihead": [16, 21], "multimod": [0, 30, 64, 74, 88, 92, 94], "multimodalembed": 0, "multimodalmodelrunn": 88, "multinod": 77, "multinomi": 6, "multipl": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 16, 17, 26, 27, 28, 29, 30, 41, 42, 71, 72, 76, 77, 78, 80, 83, 84, 88, 89, 93, 94, 98], "multiple_profil": [30, 74, 78, 81, 94], "multipli": [5, 17, 28, 83], "multiply_and_lora": 84, "multiply_collect": 84, "multiprocessor": 16, "munsign": 1, "musecrossattent": 1, "musedynamictre": 0, "musegemmallreduceplugin": 1, "musegptattentionplugin": 1, "musegpudirectstorag": 0, "museloraplugin": 1, "musemambaconv1dplugin": 1, "musemrop": 1, "musepositionembed": 1, "museshapeinfer": 1, "musetokentypeembed": 1, "must": [0, 1, 2, 3, 4, 5, 6, 9, 10, 12, 16, 18, 29, 30, 31, 33, 46, 71, 78, 83, 84, 86, 88, 91, 93], "mutabl": [0, 1], "mutablepageptr": 1, "mutex": [0, 1], "mutual": [6, 91], "muvm": 1, "muvmdiff": 1, "mverificationsets": 0, "mversion": 1, "mvocabs": 1, "mvocabsizepad": 1, "mweight": 0, "mwindows": 0, "mworkerexecutablepath": 0, "mworldconfig": 1, "my": [1, 41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 60, 67, 68, 74, 89, 95], "my_faster_on": 37, "my_model": 14, "my_profile_export": [31, 39, 40], "myattent": 96, "mybatchedlogitsprocessor": 53, "myconfig": 96, "mydecoderlay": [14, 96], "mylogitsprocessor": 53, "mymodel": [14, 96], "mymodelforcausallm": [14, 96], "n": [1, 2, 5, 10, 12, 15, 16, 27, 28, 31, 43, 46, 47, 48, 49, 50, 51, 53, 56, 57, 58, 59, 60, 70, 71, 74, 76, 80, 83, 84, 85, 90, 91, 93, 94], "n_worker": 71, "na": [74, 94], "naiv": 81, "naivepatternrewriter_replaceaddwithsub": 7, "name": [0, 1, 3, 6, 7, 10, 15, 16, 31, 32, 41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 57, 60, 67, 68, 70, 71, 74, 75, 76, 83, 85, 86, 87, 88, 89, 93, 94, 95, 96], "named_network_output": 93, "named_paramet": 17, "namespac": [0, 1, 70, 85], "nation": 74, "nationwid": 74, "nativ": [19, 22, 28, 29, 94, 96], "native_quant_flow": 85, "natur": [19, 28, 29, 35, 62, 76], "naur": [0, 3, 71], "nb": 85, "nbattentionlay": [0, 1], "nbdim": 1, "nbhead": 1, "nbkvhead": [0, 1], "nbkvheadperlay": 0, "nblayer": 1, "nbrnnlayer": 1, "nccl": [16, 26, 29, 30, 83, 93, 94], "nccl_p2p_level": 94, "nccl_plugin": 30, "ncclplugin": 16, "ncclrecv": [29, 83], "ncclsend": [29, 83], "nd": [74, 83], "ndarrai": [83, 84, 88], "ndim": 83, "nearest": [28, 71, 83], "nearli": [7, 22, 28], "necess": 12, "necessari": [1, 4, 12, 26, 28, 29, 59, 78, 83, 94, 99], "necessarili": [1, 16, 90], "necessit": 29, "need": [1, 2, 3, 5, 6, 7, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 27, 28, 29, 31, 32, 37, 41, 46, 51, 56, 57, 58, 59, 66, 67, 68, 70, 71, 72, 74, 75, 76, 77, 78, 80, 81, 82, 83, 85, 86, 88, 89, 90, 93, 94, 96, 97, 98, 99, 100], "needed_block": 100, "needsdecoderprologu": 1, "needskvcacherewind": 1, "neg": [1, 71, 82, 83], "neglig": [9, 25, 80], "neither": [3, 83, 90], "nemo": [15, 18, 30, 72, 76, 88, 91, 92, 94], "nemo_ckpt_dir": 85, "nemo_prompt_convert": 88, "nemotron": [92, 94], "nemotron_na": 94, "nemotronforcausallm": 92, "nemotronna": [92, 94], "nemotronnasforcausallm": 92, "neox": [5, 6, 91, 92, 94], "nest": 7, "net": [9, 71, 93], "net_guard": 7, "network": [3, 4, 5, 7, 11, 16, 18, 19, 28, 29, 30, 46, 83, 89, 90, 91, 93, 94], "neural": [4, 7, 16, 89, 94], "neva": [92, 94], "never": [7, 74, 82], "nevertheless": 29, "new": [0, 1, 3, 5, 6, 7, 9, 10, 12, 13, 19, 22, 23, 26, 27, 29, 31, 32, 34, 36, 43, 47, 48, 49, 50, 51, 53, 61, 63, 65, 66, 70, 71, 72, 80, 81, 83, 88, 89, 94, 95, 97, 99], "new_decoder_architectur": [15, 85], "new_generated_id": 88, "new_input": 7, "new_out": 7, "new_shap": 83, "new_tensor": 83, "new_token": 88, "new_workflow": 94, "newactiverequestsqueuelatencym": [0, 31], "newer": [92, 94], "newest": [23, 71], "newli": [0, 29, 71, 80], "newsiz": 1, "newtoken": 1, "newtokensstep": 1, "newtokensvec": 1, "newvalu": 0, "next": [1, 10, 12, 16, 19, 22, 27, 29, 65, 66, 72, 77, 78, 80, 81, 82, 88, 90, 92, 94], "next_logit": 88, "next_medusa_input_id": 88, "next_medusa_logit": 88, "next_step_buff": 88, "next_step_tensor": 88, "nextdraftindic": 1, "nextdraftlen": 1, "nextdraftpath": 1, "nextdraftprob": 1, "nextdrafttoken": 1, "nextdrafttokenslen": 1, "nextflattoken": 1, "nextgenerationlength": 1, "nextn": 27, "nextpositionoffset": 1, "ngc": [67, 68, 89, 94, 95], "ngoanpv": 94, "ngram": [0, 6, 71, 85], "ngramdecodingconfig": 71, "ngramsiz": 0, "ngroup": 83, "nhead": 83, "nhere": 46, "ni": [46, 91], "nic": 29, "nice": 29, "nine": 89, "nj": 49, "njane": [43, 47, 48, 49, 50, 51, 53], "njason": 59, "nmh": 88, "nmt": [88, 92, 94], "nn": [83, 96], "no_quant": 71, "no_repeat_ngram_s": [6, 71, 88], "no_schedule_after_st": 100, "no_schedule_until_st": 100, "noauxtckernel": 26, "node": [0, 2, 6, 11, 18, 27, 28, 29, 30, 56, 57, 58, 65, 70, 71, 73, 76, 77, 83, 88, 91, 93, 94], "noexcept": [0, 1], "nomin": [43, 47, 48, 49, 50, 51], "non": [0, 2, 5, 8, 13, 16, 19, 25, 26, 27, 28, 29, 30, 53, 71, 83, 93, 94], "non_block": 53, "non_gated_vers": 83, "none": [1, 6, 7, 14, 17, 19, 30, 31, 37, 52, 53, 54, 55, 59, 60, 71, 74, 76, 80, 83, 84, 85, 86, 87, 88, 93, 94, 96, 98], "nonetyp": [71, 88], "nonzero": 83, "nor": [29, 90], "norepeatngrams": [0, 1, 6], "norm": [17, 20, 28, 57, 73, 74, 75, 76, 83, 94, 96], "norm_before_bmm1": [84, 85], "norm_elementwise_affin": 84, "norm_ep": 84, "norm_epsilon": [15, 85], "norm_factor": 5, "norm_num_group": 84, "norm_pre_residual_weight": 83, "norm_quant_fus": 30, "norm_typ": 84, "norm_weight": 83, "normal": [0, 6, 9, 10, 13, 25, 26, 27, 28, 29, 71, 74, 83, 90, 94], "normalize_log_prob": 71, "normalize_weight": 10, "normalized_shap": [83, 84], "normalizelogprob": [0, 1], "normedscorescba": 1, "north": [14, 16, 93], "northeastern": 89, "not_op": 83, "notabl": 25, "notat": 27, "note": [1, 2, 7, 9, 10, 11, 12, 16, 20, 23, 25, 26, 27, 28, 29, 30, 33, 37, 52, 56, 57, 58, 60, 65, 66, 71, 74, 75, 78, 80, 82, 83, 86, 88, 90, 91, 92, 93, 95, 96, 99], "notic": [52, 59], "notifysyncmessag": 0, "notimplementederror": 19, "nougat": [91, 92, 94], "nour": 60, "now": [6, 12, 15, 17, 21, 26, 27, 29, 72, 74, 80, 86, 89, 90, 94], "np": 83, "npy": 88, "npytorch_backend_config": 31, "nsight": 65, "nsy": 73, "ntask": [16, 31, 56, 57, 58], "null": [1, 15, 74, 89], "nullopt": [0, 1], "nullptr": [0, 1], "num": [0, 1, 20, 55, 57, 65, 71, 73, 74, 75, 76, 78, 79, 81], "num_attention_head": [15, 83, 84, 85], "num_aud_token": 88, "num_beam": [6, 88], "num_beam_group": 6, "num_block": [88, 99], "num_blocks_per_cache_level": 52, "num_bucket": [83, 84], "num_channel": [84, 85], "num_class": 84, "num_context": 98, "num_ctx_token": 98, "num_draft_token": [0, 83, 88], "num_eagle_lay": [44, 45, 71], "num_embed": 84, "num_experts_per_tok": 4, "num_gener": 98, "num_group": [83, 84], "num_head": [5, 17, 83, 88, 98], "num_hidden_lay": [15, 85, 96, 99], "num_imag": 88, "num_img_token": 88, "num_key_value_head": [15, 85, 99], "num_kv_head": [8, 83, 84, 88, 98, 99], "num_kv_heads_origin": 83, "num_kv_heads_per_cross_attn_lay": 88, "num_kv_heads_per_lay": 88, "num_lay": [83, 84, 88, 99], "num_ln_in_parallel_attn": 85, "num_local_block": 84, "num_local_expert": 4, "num_lora_module_lay": 10, "num_lora_modules_lay": 10, "num_medusa_head": [55, 71, 85, 87, 88], "num_medusa_lay": [85, 87], "num_multimodal_token": 0, "num_nextn_predict_lay": [20, 27, 71], "num_orig_po": 83, "num_po": 83, "num_postprocess_work": 31, "num_profil": 85, "num_q_head": 26, "num_request": [20, 27, 74, 75], "num_return_sequ": [88, 94], "num_sampl": 73, "num_slot": 29, "num_task": 84, "num_token": [5, 26, 83, 98], "num_tokens_per_block": [83, 99], "num_tokens_per_task": 84, "num_video": 88, "numa": [11, 29], "numacceptedtoken": 0, "numactiverequest": 0, "numactl": 29, "numattentionhead": 1, "numavailablepag": 1, "numbeamscba": 1, "number": [0, 1, 2, 3, 4, 5, 6, 8, 12, 16, 20, 24, 26, 27, 28, 29, 30, 31, 53, 56, 57, 58, 71, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 88, 90, 91, 93, 94, 96, 98, 99], "numblockspercachelevel": 0, "numcompletedrequest": 0, "numcontextrequest": [0, 1], "numcopystream": [0, 1], "numctxsequ": 1, "numctxtoken": 0, "numdevicemodulelay": 0, "numdrafttoken": [0, 1], "numdrafttokenshost": 1, "numeaglelay": 1, "numel": 88, "numensurework": 0, "numer": [6, 11, 26, 65, 74, 89, 92], "numexpert": 1, "numgeneratedtoken": 0, "numgenrequest": 0, "numgensequ": 1, "numgentoken": 0, "numhead": 6, "numhostmodulelay": 0, "numkvattentionhead": 1, "numkvhead": 6, "numlanguag": 1, "numlay": 6, "nummissedblock": 0, "numnewactiverequest": 0, "numnewallocatedblock": 0, "numnewtokenscumsum": 94, "numnod": [0, 94], "numpag": 1, "numpausedrequest": 0, "numpi": [10, 83, 88], "numputwork": 0, "numqueuedrequest": [0, 94], "numrequestswithdrafttoken": 0, "numreturnbeam": 0, "numreturnsequ": [0, 1, 3], "numreusedblock": 0, "numscheduledrequest": 0, "numsequ": 1, "numslot": 1, "numtoken": 1, "numtotalallocatedblock": 0, "numtransformerslay": 1, "nvcc": 20, "nvcr": 94, "nvfp4": [26, 29, 30, 60, 65, 71, 74, 94, 95], "nvidia": [15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 29, 30, 32, 34, 35, 36, 38, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 68, 72, 74, 75, 76, 81, 83, 89, 90, 92, 93, 94, 95], "nvila": [92, 94], "nvinfer1": [0, 1], "nvl": [1, 30, 94], "nvl36": 77, "nvl72": [28, 77], "nvlink": [2, 6, 11, 29, 76, 77, 79, 94], "nvswitch": [16, 26], "nvtx": 71, "nyou": 46, "o": [0, 1, 7, 10, 19, 24, 26, 28, 56, 57, 58, 73, 93], "o_proj": 17, "oai": [35, 62], "obei": 93, "object": [0, 1, 3, 9, 14, 16, 17, 19, 37, 46, 71, 83, 84, 85, 86, 88, 89, 90, 97], "observ": [28, 52, 75], "obtain": [2, 18, 75, 83], "obviou": 28, "occas": 93, "occasion": 94, "occup": [5, 90], "occupi": [25, 28, 29, 90], "occur": [6, 9, 99, 100], "odd": 53, "off": [9, 28, 73, 78, 80, 81, 90, 94], "offer": [16, 18, 25, 26, 72, 98], "offic": 46, "officenetsecur": 46, "offici": [5, 20, 27, 74], "offlin": [14, 23, 28, 41, 74, 75, 94], "offload": [0, 8, 13, 29, 30, 65, 71, 94], "offset": [1, 83, 88, 91, 94], "offsetdim": 1, "ofitensor": 0, "often": [0, 3, 8, 12, 21, 25, 26, 29, 71, 77, 78, 83], "ok": 93, "okai": 52, "old": [7, 10, 27, 93], "older": [9, 19, 66, 92], "oldest": [10, 71], "oldvalu": 0, "omit": [1, 3, 19, 83], "ompi": [68, 93], "onboard": [0, 9, 71, 90], "onboard_block": 71, "onboardblock": 0, "onc": [0, 3, 5, 6, 7, 16, 18, 29, 66, 70, 71, 78, 83, 90], "one": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 19, 21, 26, 27, 28, 30, 31, 32, 59, 70, 71, 74, 76, 77, 78, 81, 82, 83, 84, 86, 88, 90, 93, 94, 96, 100], "ones": [0, 10], "oneshot": [26, 83], "oneshotallreduc": 26, "oneshotar": 26, "onevis": [92, 94], "ongo": [19, 29, 60], "onli": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 19, 20, 25, 27, 28, 29, 30, 31, 37, 53, 60, 65, 70, 71, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 86, 88, 90, 92, 94, 97, 100], "onlin": [18, 23, 41], "only_cross_attent": 84, "onnx": [30, 83], "onnx__gathernd": 83, "onto": 6, "oom": [20, 21, 24, 28, 90], "ootb": [28, 94], "op": [0, 1, 7, 28, 71, 83, 94], "op_and": 83, "op_or": 83, "op_xor": 83, "opaqu": 7, "opaque_st": 71, "open": [6, 21, 26, 28, 29, 60, 72, 73, 93, 94], "openai": [31, 64, 89, 94], "openipc": 1, "openmpi": 94, "opensora": 94, "openssh": 32, "oper": [0, 1, 3, 5, 6, 7, 11, 12, 15, 16, 17, 26, 28, 29, 30, 53, 71, 74, 77, 78, 81, 83, 89, 90, 92, 94, 97, 98, 99], "opportun": 74, "opt": [3, 15, 25, 28, 32, 83, 91, 92, 93, 94], "opt_batch_s": [71, 85], "opt_num_token": [30, 71, 85], "optforcausallm": [15, 85], "optim": [1, 2, 3, 6, 7, 8, 11, 12, 16, 18, 19, 21, 22, 23, 24, 25, 29, 30, 47, 53, 55, 66, 70, 71, 72, 74, 75, 77, 78, 79, 83, 89, 90, 92, 93, 94, 95, 97, 98, 99], "optimaladapters": [0, 1], "option": [0, 1, 3, 6, 7, 8, 11, 12, 14, 19, 22, 27, 30, 31, 37, 53, 57, 59, 65, 68, 71, 73, 74, 75, 76, 77, 79, 80, 83, 86, 88, 90, 93, 94, 96, 98, 99], "optionalbufferptr": 1, "optionaltensorptr": 1, "optmodel": 85, "optvec": 1, "orchestr": [0, 2, 12, 29, 93, 94], "orchestratorconfig": 0, "orchleadercomm": 0, "order": [0, 1, 2, 5, 8, 17, 21, 71, 74, 75, 78, 82, 83, 84, 90], "org": [0, 1, 4, 10, 30, 67, 68, 83, 91], "organ": [8, 72, 99], "orient": [28, 29], "origin": [0, 5, 7, 10, 11, 27, 28, 29, 83, 94, 96], "original_max_position_embed": [83, 84], "originaltemperatur": 1, "oserror": 94, "osl": [21, 22, 23, 24, 26, 27, 28, 29, 74, 75, 81], "ostream": [0, 1], "other": [0, 1, 2, 3, 4, 5, 6, 9, 11, 12, 16, 17, 19, 21, 26, 27, 28, 29, 30, 37, 50, 52, 56, 57, 58, 60, 66, 70, 71, 72, 75, 76, 77, 78, 80, 81, 82, 83, 86, 90, 93, 94, 98, 100], "other_audio_input": 88, "other_decoder_input": 88, "other_vision_input": 88, "othercach": 1, "otherwis": [0, 1, 3, 5, 6, 37, 71, 74, 83, 88, 93, 98], "our": [20, 25, 26, 27, 28, 29, 43, 46, 47, 48, 50, 51, 74, 75, 78, 80, 81, 83, 92, 93, 94, 96], "out": [0, 1, 2, 10, 19, 21, 22, 23, 24, 26, 27, 28, 29, 41, 56, 57, 58, 70, 73, 75, 78, 80, 81, 83, 89, 90, 94], "out_bia": 84, "out_channel": 84, "out_context_dim": 84, "out_dim": 84, "out_fatur": 15, "out_featur": [15, 16, 84], "out_hidden_s": 83, "out_of_tree_exampl": 96, "out_point": 83, "out_tp": [21, 24], "outdim": 1, "outdimfirst": 1, "outer": 83, "outlin": 73, "output": [0, 1, 2, 5, 6, 7, 9, 10, 12, 16, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 59, 60, 67, 68, 71, 73, 75, 76, 77, 78, 79, 81, 82, 83, 84, 88, 89, 93, 94, 95, 97, 98, 100], "output_cum_log_prob": 88, "output_dim": 84, "output_dir": [10, 13, 14, 15, 16, 19, 30, 74, 77, 85, 87, 89, 93], "output_dtyp": [83, 84], "output_generation_logit": 88, "output_id": 88, "output_log_prob": 88, "output_multiplier_scal": 85, "output_pad": [83, 84], "output_path": 29, "output_s": 84, "output_seqlen": [21, 24], "output_sequence_length": 88, "output_timing_cach": [30, 71], "output_token": 74, "outputbuff": 1, "outputconfig": [0, 3, 37, 94], "outputidscba": 1, "outputlen": 0, "outputlogprob": 1, "outputtokenid": [0, 3], "outsid": [12, 18, 19, 98], "outsiz": 1, "outstand": 27, "outtpsplitdim": 1, "outweigh": 77, "over": [0, 1, 9, 12, 17, 20, 22, 23, 25, 26, 28, 33, 71, 73, 74, 77, 80, 81, 83, 94], "overal": [3, 5, 9, 11, 12, 20, 27, 28, 29, 72, 77, 78, 80, 81, 82, 96], "overcom": [5, 16, 26], "overflow": 1, "overhead": [0, 3, 16, 26, 27, 28, 77, 94, 98], "overiew": 74, "overlap": [0, 2, 12, 20, 26, 27, 28, 29, 71, 94, 100], "overload": [0, 1], "overrid": [1, 17, 19, 37, 71, 83, 88], "override_field": 85, "overshadow": 77, "oversubscrib": [70, 76], "overview": [3, 8, 20, 25, 29, 65, 66, 73, 74, 76, 95, 97], "overwhelm": 59, "overwrit": [5, 31], "own": [0, 1, 2, 9, 12, 15, 16, 17, 18, 19, 20, 27, 29, 37, 66, 96], "ownership": 0, "ownsev": 1, "ownsstream": 1, "p": [0, 6, 12, 18, 32, 56, 57, 58, 71, 85, 88, 94], "p2p": [29, 83], "p50": [74, 75], "p90": [74, 75, 76], "p95": [74, 75, 76], "p99": [74, 75, 76], "p_max": 0, "p_x": 0, "pack": [0, 1, 6, 30, 65, 82, 83, 85, 90, 96], "packag": [3, 66, 67, 68, 74, 76, 93, 94], "packed_length": 85, "packedinput": 1, "packedmask": 1, "packedmaskhost": 1, "packedmaskhostcopi": 1, "packedmasksdevic": 1, "packedpositionid": 1, "pad": [0, 1, 6, 7, 10, 28, 30, 31, 65, 71, 72, 83, 84, 88, 90, 94], "pad_id": [71, 88], "pad_lda": 84, "pad_ldc": 84, "pad_token_id": 88, "padding_2d": 83, "padding_back": 83, "padding_bottom": 83, "padding_front": 83, "padding_left": 83, "padding_mod": 84, "padding_right": 83, "padding_top": 83, "padid": 0, "page": [1, 2, 6, 9, 16, 23, 30, 65, 70, 74, 76, 78, 83, 89, 90, 94, 98], "paged_context_fmha": [78, 94], "paged_kv_cach": [10, 30, 74, 88], "paged_st": [30, 88], "pagedcontextfmha": 1, "pagedkvcach": 6, "pagedst": 1, "pageid": 1, "pageidx": 1, "pagemanagerconfig": 1, "pageptr": 1, "pagewidth": 1, "paid": 29, "pair": [0, 1, 21, 71, 78, 81, 83], "pale": 52, "paper": [2, 10, 12, 22, 27, 28, 91, 98], "par": [80, 81], "parallel": [0, 2, 3, 5, 6, 12, 15, 16, 20, 21, 23, 24, 27, 31, 41, 42, 51, 53, 65, 71, 75, 78, 79, 83, 84, 85, 90, 94, 96, 100], "parallel_attent": [15, 85], "parallelconfig": [0, 94], "param": [0, 1, 17, 47, 48, 49, 51, 52, 60, 71, 83, 84, 85, 88], "paramet": [0, 1, 3, 4, 5, 8, 9, 10, 12, 13, 15, 16, 17, 19, 20, 28, 30, 31, 56, 71, 74, 77, 78, 79, 82, 83, 84, 85, 88, 90, 94, 98], "parametr": 88, "parent": [0, 1, 17, 19], "parent_hash": 52, "parenthash": 0, "parentid": 1, "pari": [43, 47, 48, 49, 50, 51, 60], "pars": [1, 71], "parse_arg": 55, "parser": [31, 55, 64], "part": [1, 3, 4, 7, 16, 17, 19, 28, 65, 66, 70, 71, 72, 75, 80, 81, 82, 83, 88, 90], "part2": 94, "parti": 94, "partial": [0, 4, 9, 16, 26, 71, 77], "particip": [0, 60, 83, 94], "participantid": [0, 2], "particular": [0, 3, 70, 79, 80, 81, 89], "particularli": [26, 28, 29, 66, 81, 99], "partit": [5, 10, 16, 56, 57, 58], "pass": [0, 1, 3, 5, 7, 9, 10, 12, 16, 17, 29, 37, 53, 59, 60, 71, 73, 74, 76, 78, 80, 81, 83, 84, 85, 88, 90, 94, 95, 96, 97, 98, 100], "past": [0, 5, 27, 29], "past_key_valu": [83, 84], "past_key_value_length": 84, "past_key_values_length": 84, "past_kv_length": 88, "past_sequence_length": 88, "patch": [84, 88], "patch_siz": [84, 85], "path": [0, 1, 3, 5, 12, 15, 17, 20, 27, 29, 30, 31, 37, 47, 48, 49, 50, 51, 55, 56, 57, 58, 60, 66, 70, 71, 73, 74, 75, 76, 78, 83, 88, 94], "path_to_llama_from_hf": 97, "path_to_meta_llama_from_hf": 70, "path_to_trt_engin": 70, "pathlib": [55, 71], "pathlik": 85, "pathorn": 94, "pathsoffset": 1, "pattern": [4, 26, 28, 29, 65, 71, 83, 94], "patternanalyz": 7, "patternrewrit": 7, "paus": [0, 29, 82, 100], "paused_request": 100, "pcie": [11, 29, 30], "pd": 29, "pdf": [0, 4, 10], "pdl": [26, 94], "peak": [0, 20, 21, 22, 26, 75], "peer": 29, "peft": 71, "peft_cache_config": [37, 50, 71], "peftcacheconfig": [0, 71], "peftcachemanag": [0, 94], "penal": [0, 6, 71], "penalti": 94, "penalty_alpha": 6, "pend": 100, "pending_request": 100, "per": [0, 1, 3, 5, 6, 8, 11, 12, 16, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 56, 57, 58, 71, 74, 75, 76, 77, 78, 83, 84, 90, 91, 94], "per_channel": 91, "per_group": 91, "per_token": 91, "per_token_scal": 83, "perceiv": 22, "percent": [0, 13], "percentag": [10, 13, 74, 75, 76], "percentil": [74, 94], "perf": [0, 20, 28, 31, 64, 71, 83, 94], "perf_best_practic": 94, "perfect": 29, "perfectli": 29, "perform": [0, 1, 2, 3, 5, 6, 7, 10, 16, 17, 18, 19, 21, 23, 24, 27, 28, 30, 31, 37, 66, 70, 71, 72, 74, 75, 77, 80, 82, 83, 88, 89, 92, 94, 96, 98, 99], "performantli": 21, "period": 29, "permut": 83, "persimmon": 94, "persist": [25, 70], "person": [32, 59], "phase": [0, 2, 7, 12, 21, 24, 26, 27, 28, 29, 30, 65, 74, 79, 80, 81, 82, 83, 90, 94, 98, 99], "phi": [70, 83, 91, 92, 94], "phi3config": 85, "phi3forcausallm": 85, "phi3model": 85, "phiconfig": 85, "phiforcausallm": 85, "phimodel": 85, "physic": [83, 90], "picasso": 60, "pick": 80, "pickl": 94, "piec": [29, 80], "piecewis": 71, "pin": [0, 1, 9], "ping": 94, "pinnedmemusag": 0, "pinnedpool": 1, "pip": [20, 31, 66, 67, 68, 89, 94], "pip3": [67, 68], "pipelin": [0, 1, 3, 6, 16, 21, 24, 30, 31, 51, 71, 74, 75, 79, 90, 94, 100], "pipeline_parallel_s": [51, 71, 77, 78], "pipelineparallel": [0, 1, 6], "pipelineparallelismrank": 1, "pitfal": [9, 19], "pixart": 84, "pixartalphatextproject": 84, "pixel_valu": 85, "pl": [68, 74], "place": [1, 29, 30, 52, 68, 83, 94, 96], "placemen": 29, "placement": [26, 29], "plai": 80, "plan": [3, 5, 26, 29, 66], "planner": 94, "platform": [29, 32, 33, 43, 47, 48, 50, 51, 66, 72, 74, 94, 95], "pleas": [2, 5, 7, 11, 12, 14, 21, 23, 24, 25, 26, 28, 29, 33, 37, 46, 53, 66, 68, 74, 75, 77, 79, 83, 93, 94, 95, 100], "plu": [11, 29, 88], "plugin": [5, 6, 7, 13, 15, 65, 66, 71, 80, 83, 85, 89, 90, 91, 93, 94], "plugin_config": [71, 78, 81, 83, 85], "plugin_namespac": 7, "plugin_typ": 7, "plugin_v2": 7, "plugin_v2_gemm_0": 93, "pluginconfig": [71, 86], "pluginconfigmeta": 86, "pluginfield": 94, "pluginv2build": 93, "pm": [20, 26, 74], "pmi": 93, "pmi2_init": 93, "pmix": [16, 31, 56, 57, 58, 93], "png": [35, 40, 62], "po": 84, "point": [1, 5, 16, 18, 22, 25, 29, 41, 46, 51, 67, 68, 70, 71, 75, 77, 82, 83, 89, 91, 93, 94], "pointer": [0, 1, 6, 17, 29, 83, 88, 94], "pointerelementtyp": 1, "polar": 92, "polici": [0, 1, 2, 29, 71, 74, 76, 90], "poll": [0, 31], "polyhedr": 16, "pong": 94, "pool": [0, 1, 5, 28, 29, 65, 71, 83, 88, 99, 100], "pooled_project": [84, 85], "pooled_projection_dim": 84, "pooledpin": 0, "poor": 2, "popd": 93, "popfirstgentoken": 0, "popul": [1, 5, 16, 60, 83], "popular": [5, 15, 19, 25, 27, 33, 70], "port": [0, 31, 33, 38], "portfolio": 23, "portion": [4, 77, 83, 90], "pos_emb_typ": 83, "pos_embd_param": 98, "pos_embed_max_s": 84, "pos_embed_typ": 84, "pose": 81, "posit": [0, 1, 12, 26, 27, 71, 74, 83, 84, 88, 94, 98], "position_embed": [83, 84], "position_embedding_typ": [5, 15, 83, 84, 85], "position_encoding_2d": 85, "position_id": [85, 88, 93, 96, 98], "positionalembeddingparam": 98, "positionembeddingtyp": [5, 83, 84, 85], "positionid": [0, 1], "positionidsbas": 1, "positionidsdevic": 1, "positionidshost": 1, "positionidshostcopi": 1, "positionoffset": 1, "positionoffsetsdevic": 1, "positionoffsetshost": 1, "positionoffsetshostcopi": 1, "posix": 0, "posix_debug_fallback": 0, "possibl": [2, 3, 5, 6, 9, 12, 16, 20, 27, 28, 29, 30, 37, 66, 72, 73, 74, 75, 78, 80, 82, 83, 90, 93, 94, 97], "possibli": [1, 8, 83], "post": [0, 15, 22, 25, 26, 27, 28, 29, 60, 72, 73, 83, 89, 94], "post_act_fn": 84, "post_attention_layernorm": [17, 96], "post_input_id": 88, "post_layernorm": [14, 15, 17, 83, 93], "post_pad": 83, "post_prompt": 88, "post_strid": 83, "posterior_threshold": [44, 45, 71], "posterioralpha": 1, "posterioralphahost": 1, "posteriorthreshold": [0, 1], "posteriorthresholdhost": 1, "postprocess": [31, 84], "postprocessor": [0, 71], "postprocparam": 71, "potenti": [0, 1, 8, 12, 28, 29, 30, 73, 74, 78, 96], "pow": 83, "power": [9, 16, 23, 25, 26, 28, 29, 72, 80, 94], "pp": [0, 2, 6, 10, 21, 24, 31, 74, 76, 83, 94], "pp2": 74, "pp_communicate_final_output_id": 88, "pp_communicate_new_token": 88, "pp_reduce_scatt": [30, 81], "pp_size": [15, 16, 31, 38, 74, 75, 77, 87, 94], "ppreducescatt": 1, "pr": [26, 29], "practic": [5, 8, 16, 22, 23, 26, 28, 29, 89, 90, 94], "pre": [0, 1, 3, 5, 15, 18, 29, 66, 68, 71, 72, 74, 83, 89, 90, 94, 98], "pre_input_id": 88, "pre_layernorm": 83, "pre_onli": 84, "pre_pad": 83, "pre_prompt": 88, "pre_quant_scal": [15, 71], "pre_strid": 83, "prebuilt": 66, "preced": [16, 83], "precis": [1, 6, 17, 21, 25, 29, 30, 65, 74, 78, 81, 86, 89, 90, 92, 94], "precompute_relative_attention_bia": 85, "precomputed_relative_attent": 84, "predefin": [12, 96, 98], "predict": [1, 5, 12, 26, 27, 29, 94], "predicteddraftlogit": 1, "predictor": 12, "predictsdrafttoken": 1, "prefer": [25, 66], "prefer_managed_weight": 84, "prefer_plugin": 83, "prefetch": 26, "prefil": [0, 28, 29, 71, 79], "prefix": [3, 12, 15, 27, 52, 70, 76, 83, 86, 93], "preliminari": [21, 23, 24, 29], "preload": 17, "premis": 27, "prepar": [0, 2, 26, 27, 29, 52, 57, 65, 73, 80, 83, 85, 91, 94, 98], "prepare_dataset": [20, 57, 73, 74, 75, 76], "prepare_input": [85, 90], "prepare_position_ids_for_cogvlm": 88, "prepare_recurrent_input": 85, "prepare_resourc": [97, 99], "prepend": 93, "preprocess": [17, 88, 91], "preprocess_weights_hook": 85, "preprocessor": 74, "prequant_scaling_factor": 15, "prerequisit": [65, 67, 68], "presenc": [6, 16, 52], "presence_penalti": [71, 88, 94], "presencepenalti": [0, 1, 6], "present": [0, 29, 71, 74, 80, 81, 91, 94], "preserv": 78, "presid": [41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 60, 67, 68, 76, 82, 89, 95], "pressur": 29, "pretrain": 18, "pretrained_config": 96, "pretrained_model_name_or_path": 85, "pretrainedconfig": [14, 19, 71, 85, 86, 96], "pretrainedmodel": [19, 85, 90], "pretrainedtokenizerbas": 71, "prevdrafttokenslen": 1, "prevent": [26, 28, 65, 70], "preview": 94, "previou": [1, 3, 4, 12, 19, 20, 22, 27, 29, 74, 76, 77, 78, 80, 81, 82, 94], "previous": [1, 21, 78, 80, 82, 94], "prevscor": 1, "prewritten": 89, "price": 74, "primari": [0, 1, 8, 25, 29, 90, 100], "primarili": 98, "primit": [16, 28, 29, 72, 89], "principl": 29, "print": [1, 5, 31, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 61, 62, 63, 67, 68, 71, 74, 75, 76, 82, 89, 90, 93, 95], "print_iter_log": [20, 57, 71], "prior": [3, 30, 66, 68], "priorit": [25, 80, 82], "prioriti": [0, 1, 8, 9, 17, 71], "prioritytyp": 0, "priorityupd": 0, "privat": [0, 1, 6, 71], "privileg": 7, "prm": 92, "pro": 26, "prob": 83, "probabilist": 84, "probabl": [0, 1, 6, 9, 12, 26, 27, 71, 83, 88, 94], "probil": 1, "problem": [5, 20, 28, 93], "proc": 17, "proccessed_weight": 17, "proccessed_zero": 17, "procedur": 20, "proceed": 16, "process": [0, 1, 2, 3, 5, 6, 8, 11, 12, 15, 16, 19, 20, 26, 27, 28, 29, 30, 41, 46, 51, 53, 56, 57, 58, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 80, 81, 82, 83, 88, 89, 93, 94, 96, 97, 98, 100], "process_input": 88, "process_logits_including_draft": 88, "processor": [0, 5, 41, 42, 54, 71, 85, 88, 94], "processorbatch": 0, "processormap": 0, "prod": 83, "produc": [0, 1, 3, 7, 16, 37, 74, 76, 78, 80, 81, 83, 94], "product": [4, 5, 12, 16, 23, 29, 72, 80, 81, 82, 83, 89, 98], "profil": [2, 30, 31, 39, 40, 65, 78, 80, 83, 88, 90, 93, 94], "profiling_verbos": [30, 71], "profit": [12, 74], "program": [2, 19, 41, 43, 47, 48, 50, 51, 53, 67, 68, 70, 82, 89, 93], "progress": [1, 26, 71, 74, 83], "proj": [15, 17, 93], "project": [5, 10, 28, 60, 66, 83, 84, 96, 99], "projector_hidden_act": 85, "prologu": [56, 57, 58], "promin": 12, "promis": [12, 19, 27], "prompt": [0, 3, 6, 9, 14, 20, 30, 31, 36, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 60, 63, 65, 67, 68, 71, 74, 76, 80, 81, 82, 84, 88, 89, 94, 95, 98], "prompt_adapter_request": [71, 94], "prompt_embedding_t": [84, 85, 88], "prompt_embedding_table_s": 85, "prompt_id": 53, "prompt_len": 98, "prompt_logprob": 71, "prompt_lookup": [12, 94], "prompt_lookup_num_token": [6, 71], "prompt_tabl": 88, "prompt_task": [85, 88], "prompt_token": 89, "prompt_token_id": [37, 54, 71], "prompt_vocab_s": [85, 88], "promptadapterrequest": 71, "promptinput": [71, 94], "promptlen": 0, "prompttableoffload": 0, "prompttuningconfig": 0, "prompttuningembed": 84, "prompttuningen": 1, "pronounc": [12, 29], "proof": 99, "propag": [9, 94], "proper": [2, 74], "properli": [17, 29, 80, 82], "properti": [3, 46, 71, 83, 85, 86, 88], "proport": 5, "propos": [0, 26], "protect": [1, 41, 51, 67, 68, 70, 89], "protocol": [0, 31, 46], "proud": [26, 29], "prove": [12, 28], "provid": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19, 20, 21, 22, 25, 26, 28, 29, 30, 31, 32, 37, 46, 55, 60, 66, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 88, 90, 92, 93, 94, 96, 97, 98], "proxy_dispatch_result_thread": 74, "prune": [7, 12, 83], "pseudo": [5, 83, 91], "pth": [17, 94], "ptq": [25, 78, 94], "ptr": 1, "ptr_idx": 17, "ptrdiff_t": 1, "ptuning_setup": 88, "ptuning_setup_fuyu": 88, "ptuning_setup_llava_next": 88, "ptuning_setup_phi3": 88, "ptuning_setup_pixtr": 88, "ptuningconfig": 0, "public": [0, 1, 25, 33, 55, 60], "publish": [20, 21, 24, 74, 75, 94], "pull": [18, 20, 66, 89, 94], "puneeshkhanna": 94, "purchas": 74, "pure": 88, "purpos": [5, 8, 28, 29, 66, 76, 78, 80, 81], "pursu": [43, 47, 48, 50, 51, 53], "push": [28, 29, 32, 54], "pushd": 93, "put": [1, 15, 26, 56, 57, 58, 70, 72, 80], "pwd": [20, 66], "py": [3, 4, 5, 7, 10, 12, 13, 14, 15, 16, 17, 19, 20, 26, 27, 28, 29, 53, 56, 57, 66, 68, 70, 73, 74, 75, 76, 77, 78, 83, 86, 88, 89, 93, 94, 96, 97, 99, 100], "py3": 94, "py_executor_cr": 100, "pybind": 94, "pybind11_object": 71, "pybindmirror": 71, "pydant": [71, 94], "pydantic_cor": 71, "pyexecutor": [94, 99, 100], "pynvml": 94, "pypi": [66, 94], "python": [1, 5, 6, 7, 10, 12, 14, 16, 18, 19, 20, 27, 28, 31, 37, 48, 49, 65, 67, 68, 70, 73, 74, 75, 76, 77, 89, 91, 94, 96, 97, 99, 100], "python3": [10, 13, 15, 20, 56, 57, 66, 68, 73, 74, 89, 93], "python_bind": 20, "python_e2": 88, "python_plugin": 94, "pythonpath": [20, 57, 58], "pytorch": [7, 12, 15, 18, 20, 27, 28, 29, 31, 38, 52, 56, 57, 58, 65, 66, 67, 68, 71, 75, 83, 94, 97, 98, 99, 100], "pytorch_backend_config": 31, "pytorch_eagle_weights_path": 71, "pytorch_extra_arg": 57, "pytorch_model": 93, "pytorch_model_engin": 97, "pytorch_model_registri": 99, "pytorchconfig": [71, 98], "pytorchmodelengin": [97, 99], "pzzzzz5142": 94, "q": [2, 5, 6, 10, 21, 26, 28, 65, 74, 83, 93, 96, 98], "q_b_proj": 83, "q_dim": 83, "q_lora_rank": [83, 84], "q_proj": [17, 96], "q_scale": [5, 83, 84, 85], "qa": 12, "qformat": [74, 87], "qgmma": 94, "qingquansong": 94, "qk_layernorm": [84, 85], "qk_nope_head_dim": [83, 84], "qk_norm": 84, "qk_rope_head_dim": [83, 84], "qkv": [7, 10, 15, 17, 65, 83, 93, 94, 98], "qkv_bia": [83, 94], "qkv_dim": 83, "qkv_proj": 96, "qo_indptr": 98, "qpi": 11, "qserv": 94, "quadrat": [5, 90], "qualiti": [27, 29, 78, 81], "qualnam": [71, 83, 85, 87], "quant": [19, 71, 74, 83, 94, 95], "quant_algo": [15, 17, 19, 37, 60, 71, 74, 78, 85], "quant_and_calib_config": 60, "quant_config": [19, 37, 60, 71, 78, 85, 98], "quant_medusa_head": 87, "quant_mod": [19, 71, 84, 85, 88], "quantalgo": [37, 60, 71, 78, 85, 87], "quantconfig": [19, 37, 60, 71, 78, 85, 94, 98], "quanticonfig": 19, "quantiz": [5, 6, 11, 16, 17, 20, 21, 22, 26, 28, 30, 41, 42, 47, 55, 65, 68, 69, 70, 71, 72, 75, 76, 79, 83, 84, 85, 88, 89, 92, 94, 96, 98], "quantizaton": 74, "quantize_and_export": 87, "quantize_kwarg": 85, "quantize_lm_head": [87, 94], "quantized_valu": 5, "quantizedkernel": 16, "quantizetensorplugin": 16, "quantmod": [1, 5, 6, 65, 71, 83, 84, 85, 87, 88], "quantmodewrapp": [71, 83], "queri": [3, 6, 8, 12, 16, 21, 28, 31, 65, 74, 83, 90, 98, 99], "query_dim": 84, "query_key_valu": 17, "query_length": 84, "query_pre_attn_scalar": 85, "question": [29, 59, 74, 90, 93], "queu": [0, 75, 80], "queue": [0, 71, 72, 97], "quick": [5, 65, 72, 74, 76, 98], "quick_gelu": 83, "quicker": 77, "quickli": [19, 89], "quickstart": [70, 76], "quickstart_advanc": [27, 56], "quit": [7, 70], "qweight": 17, "qwen": [17, 31, 40, 70, 74, 83, 91, 92, 94], "qwen1": [92, 94], "qwen2": [10, 31, 35, 40, 62, 74, 92, 94], "qwen2_5_vlforconditionalgener": 92, "qwen2audio": 94, "qwen2forcausallm": 92, "qwen2forprocessrewardmodel": 92, "qwen2forrewardmodel": 92, "qwen2forsequenceclassif": 94, "qwen2vl": 94, "qwen2vlforconditionalgener": 92, "qwen3": 29, "qwenforcausallm": 17, "qwenforcausallmgenerationsess": 88, "qwenvl": 94, "qwq": 92, "qychen": 10, "qzero": 17, "r": [1, 10, 31, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 67, 68, 76, 82, 83, 89, 93, 94, 95], "r1": [29, 31, 64, 75, 94], "r1_in_tensorrt": [26, 94], "race": 94, "radix": 99, "rai": 1, "rais": [19, 71, 76, 93, 94], "rand": 83, "rand_data": 83, "rand_data_sampl": 85, "rand_data_valid": 85, "random": [0, 6, 31, 39, 40, 71, 75, 83, 94], "random_se": [71, 85, 88], "randomdatasampl": 1, "randomdatavalid": 1, "randomli": 75, "randomse": [1, 6, 94], "randomseedtyp": 0, "rang": [0, 6, 9, 12, 73, 81, 83, 85, 90, 91, 92, 93, 96], "rank": [0, 1, 2, 3, 4, 6, 10, 19, 20, 28, 29, 30, 70, 74, 83, 85, 88, 90, 93, 94], "rank0": 15, "rank1": 15, "rapid": [12, 75, 89], "rate": [0, 20, 26, 27, 28, 31, 39, 40, 74, 75, 76, 94], "rather": [5, 7, 12, 28, 29, 68, 72], "ratio": [28, 29], "rational": 28, "raw": 31, "raw_audio": 88, "raw_imag": 88, "rdma": 2, "re": [20, 25, 29, 71, 72, 94, 98], "reach": [0, 5, 15, 70, 74, 78, 82], "react": 29, "read": [0, 2, 3, 5, 12, 14, 16, 17, 20, 26, 27, 29, 30, 59, 71, 74, 94], "read_config_from_the_custom_training_checkpoint": 19, "readabl": 74, "reader": 83, "readi": [0, 89], "readm": [2, 12, 31, 70, 76, 94], "real": [7, 20, 26, 29, 66, 76, 78, 80, 81, 83, 93], "realiti": 80, "realiz": [9, 12], "rearrang": 83, "reason": [0, 5, 6, 16, 19, 26, 27, 29, 31, 64, 71, 74, 77, 80, 81, 83, 93], "reasoning_pars": [31, 38], "rebalanc": 29, "rebuild": [81, 83, 93], "receiv": [0, 1, 2, 3, 4, 11, 12, 29, 78, 83, 94], "recent": [1, 4, 5, 22, 26], "recip": [26, 28, 31, 71, 91], "reclaim": 0, "recogn": [12, 26, 29, 74, 96], "recommend": [2, 5, 6, 12, 14, 17, 18, 20, 22, 25, 28, 29, 31, 53, 66, 71, 74, 79, 80, 82, 93, 94, 96, 98], "recompute_scale_factor": 83, "reconfigur": [3, 68], "reconstruct": [5, 83], "record": [1, 7, 20, 26, 27, 29, 71], "recored": 0, "recreat": 18, "recurr": 12, "recurrentgemma": [91, 92, 94], "recurrentgemmaforcausallm": 85, "recurs": [20, 66, 70], "recv": [0, 16, 29, 83], "recvconnect": 0, "recvpollperiodm": 0, "recycl": [5, 99], "redesign": 94, "redirect": [7, 71], "redistribut": 29, "redraft": [65, 83, 88, 94], "redrafter_draft_len_per_beam": 88, "redrafter_inverted_temperatur": 85, "redrafter_num_beam": 88, "redrafterforcausallm": 85, "reduc": [2, 3, 4, 5, 9, 11, 12, 16, 20, 21, 24, 26, 27, 28, 29, 30, 66, 70, 72, 73, 74, 75, 76, 77, 80, 82, 83, 90, 93, 94, 98], "reduce_fus": [30, 74, 78, 81], "reduce_scatt": 83, "reduceoper": 83, "reducescatt": [30, 81, 94], "reduct": [11, 12, 26, 82, 83], "redund": [12, 26, 29], "refactor": [19, 27, 94], "refer": [0, 1, 2, 3, 5, 6, 7, 8, 10, 12, 16, 18, 19, 20, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 53, 61, 62, 63, 66, 70, 72, 74, 75, 76, 77, 78, 79, 81, 83, 89, 92, 94, 96, 98], "referenc": 78, "reference_wrapp": [0, 3], "refin": 94, "refit": [16, 30, 94], "refit_engin": 16, "reflect": 80, "refresh": 74, "regard": 83, "regardless": 93, "regex": [3, 71], "region": 73, "regist": [29, 32, 65, 93, 94, 96], "register_auto_model": 96, "register_network_output": 93, "registerdesc": 0, "registermemori": 0, "regress": [5, 6, 16], "regular": [0, 3, 5, 26, 71, 83], "reinforc": 79, "reject": [0, 27], "rel": [9, 21, 29, 80, 82, 83, 94], "rel_attn_t": 84, "relat": [2, 4, 8, 17, 65, 72, 73, 83, 86, 90, 93, 94, 95, 96, 99], "relationship": 90, "relative_attent": [83, 84], "relative_attention_bia": 83, "relax": [5, 29], "relaxed_delta": [26, 27, 71], "relaxed_topk": [26, 27, 71], "releas": [1, 5, 6, 8, 19, 21, 24, 25, 29, 65, 72, 83, 85, 90, 91, 92], "release_build": 66, "release_run": [66, 89], "releasepag": 1, "releasest": 0, "relev": [6, 66, 99], "reli": [2, 5, 7, 19, 29, 70, 73, 91], "reload": [3, 29], "relu": [15, 16, 83, 93], "remain": [0, 7, 9, 12, 13, 26, 29, 66, 75, 76, 78, 80, 81, 83, 90, 94], "remaind": 78, "remark": [26, 27], "rememb": 29, "remind": [5, 98], "remot": [29, 31, 71], "remotenam": 0, "remov": [0, 1, 5, 6, 7, 8, 16, 17, 20, 27, 30, 31, 55, 66, 71, 72, 78, 83, 90, 94, 96], "remove_const_t": 1, "remove_cv_t": 0, "remove_duplicated_kv_head": 85, "remove_input_pad": [5, 10, 30, 83, 84, 88], "remove_pointer_t": 1, "remove_reference_t": 1, "remove_sequ": 99, "renam": 94, "reorder": [83, 84], "reorder_kv_cache_for_beam_search": 88, "rep": 73, "repeat": [0, 5, 27, 28, 71, 83], "repeat_interleav": 83, "repeatedli": 12, "repetit": [0, 6, 71, 83], "repetition_penalti": [6, 71, 88, 94], "repetitionpenalti": [0, 1, 6], "replac": [1, 4, 7, 16, 17, 19, 20, 28, 74, 76, 78, 82, 83, 90, 96], "replace_add_with_sub": 7, "replace_all_uses_with": [7, 83], "replace_input_with": 7, "replace_output_uses_with": 7, "replace_outputs_uses_with": 7, "replai": 29, "replic": [0, 3, 26, 29, 83], "replit": [91, 92, 94], "repo": [19, 70, 72, 76, 93], "repo_id": 59, "report": [8, 27, 28, 29, 73, 74, 75, 90, 94], "report_load_statist": 29, "reportpluginerror": 93, "repositori": [12, 18, 20, 32, 70, 89], "repres": [0, 1, 2, 8, 12, 20, 21, 25, 26, 29, 46, 59, 71, 74, 80, 83, 88, 100], "represent": [7, 16], "reproduc": [65, 74, 94], "req": [20, 74, 75, 76, 78, 80, 81], "req_id": 53, "req_logit": 53, "req_stat": 100, "req_token_id": 53, "reqbeamwidth": 1, "reqid": 0, "reqpromptlength": 1, "request": [0, 2, 5, 6, 9, 10, 16, 20, 22, 24, 27, 28, 29, 30, 31, 39, 40, 53, 57, 71, 72, 73, 74, 75, 76, 78, 80, 81, 82, 83, 89, 90, 94, 97, 98, 99, 100], "request_id": [37, 54, 71, 98], "request_stats_max_iter": 71, "request_timeout": 31, "request_typ": 71, "request_type_context_and_gener": [0, 2], "request_type_context_onli": [0, 2], "request_type_generation_onli": [0, 2], "requesterror": 71, "requestid": [0, 2, 3], "requestidtyp": 0, "requestlist": 100, "requestoutput": [37, 54, 71, 94], "requestperfmetr": 0, "requestschedul": 100, "requeststag": 0, "requeststat": 0, "requeststatsmaxiter": 0, "requeststatsperit": 0, "requeststatsperiter": 0, "requeststatsvec": 0, "requesttoken": 3, "requesttyp": [0, 1, 2, 71], "requesttypesdevic": 1, "requestvector": 1, "requir": [0, 2, 5, 6, 9, 10, 12, 16, 17, 19, 20, 21, 25, 26, 28, 29, 30, 31, 46, 59, 66, 67, 68, 71, 74, 75, 76, 77, 78, 81, 83, 84, 89, 90, 92, 93, 94, 99], "require_ln_f": 85, "requiresattentionmask": 1, "rerun": 81, "rescale_output_factor": 84, "research": [5, 27, 29, 33, 43, 47, 48, 50, 51, 91], "resembl": 52, "reserv": [0, 1, 31, 71, 82, 88, 90, 100], "reserved_block": 100, "reset": [0, 1, 6, 71, 74, 88], "resetspeculativedecodingmodul": 1, "reshap": [1, 83], "resid": [10, 29, 60], "residu": [83, 93], "residual_connect": 84, "residual_mlp": 85, "residual_multipli": 85, "residual_rms_norm": 83, "residual_rms_norm_out_quant_fp8": 83, "residual_rms_norm_out_quant_nvfp4": 83, "residual_rms_norm_quant_fp8": 83, "residual_rms_norm_quant_nvfp4": 83, "residual_rms_prepost_norm": 83, "residualadd": [30, 81, 94], "resiz": 1, "resolv": [35, 62, 93], "resourc": [0, 2, 5, 19, 26, 28, 97, 99, 100], "respect": [4, 37, 82, 83, 88, 90, 91, 96, 100], "respons": [0, 2, 8, 31, 37, 61, 62, 63, 71, 74, 83, 97], "responsewithid": 0, "rest": [1, 5, 78], "restart": 0, "restrict": [0, 2, 3, 6, 66, 71, 83], "result": [0, 1, 4, 5, 11, 12, 16, 21, 22, 23, 25, 27, 28, 29, 30, 37, 65, 66, 71, 74, 77, 78, 79, 80, 81, 83, 84, 94, 96, 98, 100], "retail": 74, "retain": [21, 23], "retent": [0, 71], "retentionprior": 0, "retentionpriorityanddur": 0, "rethink": 12, "retriev": [1, 17, 71, 75, 83], "return": [0, 1, 3, 7, 10, 12, 14, 16, 17, 19, 37, 71, 74, 80, 83, 84, 85, 88, 90, 93, 94, 99, 100], "return_all_generated_token": 88, "return_context_logit": 71, "return_dict": 88, "return_encoder_output": [71, 88], "return_generation_logit": 71, "return_perf_metr": 71, "returnallgeneratedtoken": [0, 3], "returncontextlogit": 0, "returnencoderoutput": 0, "returngenerationlogit": 0, "returnlogprob": 0, "returnperfmetr": 0, "reus": [0, 2, 3, 8, 27, 30, 65, 69, 71, 83, 88, 90, 94, 96, 99], "reusabl": [8, 9, 29], "reusedblock": 0, "reusedblocksperrequest": 0, "reveal": [26, 28], "revers": 83, "revert": 83, "review": [29, 74], "revis": 71, "revolution": 72, "rewind": [27, 94], "rewrit": [65, 83, 94, 96], "rewritepatternmanag": 7, "rewrt": 93, "rf": 93, "rg_lru": 83, "rgc": 74, "rh": [0, 1], "rich": 15, "right": [72, 78, 83, 93], "rigor": [52, 74], "risk": [2, 16, 78, 82], "rm": [66, 83, 92, 93, 96], "rms_norm": [26, 83, 96], "rmsnorm": [10, 26, 83, 84, 85, 94, 96], "rnn": [30, 94], "rnn_conv_dim_s": 88, "rnn_head_siz": 88, "rnn_hidden_s": 88, "rnn_state": 85, "rnnconfig": 1, "rnnconvdims": 1, "rnnheadsiz": 1, "rnnhiddens": 1, "ro": 20, "roberta": [92, 94], "robertaforquestionansw": 85, "robertaforsequenceclassif": 85, "robertamodel": 85, "robin": 2, "robust": [26, 29, 94], "rock": 83, "role": [16, 31, 34, 35, 46, 61, 62, 80, 89], "roll": 65, "rooflin": 28, "root": [15, 20, 32, 66, 68, 70, 71, 76, 83, 89], "root_lay": 7, "rope": [26, 28, 83, 88, 94, 98], "rope_gpt_neox": [5, 83, 85], "rope_gptj": [5, 83], "rope_local_base_freq": 85, "rope_scaling_config": 83, "rope_scaling_long_factor": 84, "rope_scaling_long_mscal": 84, "rope_scaling_short_factor": 84, "rope_scaling_short_mscal": 84, "ropeembeddingutil": 83, "rotari": [0, 26, 83, 88, 96, 98], "rotary_bas": 85, "rotary_cos_sin": 83, "rotary_dim": 85, "rotary_embed": 96, "rotary_embedding_bas": [83, 84], "rotary_embedding_base_loc": 84, "rotary_embedding_beta_fast": 84, "rotary_embedding_beta_slow": 84, "rotary_embedding_dim": [5, 83, 85], "rotary_embedding_long_m_scal": 83, "rotary_embedding_max_posit": 83, "rotary_embedding_mscal": 84, "rotary_embedding_mscale_all_dim": 84, "rotary_embedding_origin_max_posit": 84, "rotary_embedding_original_max_posit": 83, "rotary_embedding_percentag": 84, "rotary_embedding_sc": 84, "rotary_embedding_scal": 83, "rotary_embedding_scale_typ": 83, "rotary_embedding_short_m_scal": 83, "rotary_inv_freq": [83, 84], "rotary_inv_freq_loc": 84, "rotary_pct": 85, "rotary_sc": [84, 85], "rotaryembed": 96, "rotaryembeddingdim": [0, 1], "rotaryscalingtyp": 83, "rotate_every_two": 83, "rotate_half": 83, "round": [2, 71, 83], "rout": [2, 28, 29], "router": [4, 10, 28, 29, 94], "router_gemm": 26, "routin": [7, 29], "routingkernel": 26, "row": [10, 80, 83, 91, 94], "rowlinear": [10, 84], "rowwis": 71, "rr": 94, "rslora": 94, "rst": 3, "rtx": 94, "rubric": 83, "rule": [5, 77, 93], "run": [0, 1, 2, 3, 5, 6, 9, 11, 12, 14, 15, 16, 21, 25, 26, 28, 30, 31, 32, 33, 48, 49, 53, 56, 57, 58, 65, 66, 67, 68, 70, 71, 72, 77, 78, 80, 81, 82, 83, 85, 88, 90, 91, 93, 94, 96, 97, 98, 99], "run_dtm_pld": 12, "run_medusa_decod": 55, "runner": [0, 15, 88], "runningleon": 94, "runpod": 32, "runtim": [0, 3, 5, 12, 13, 18, 26, 27, 29, 30, 31, 50, 53, 59, 65, 71, 72, 73, 74, 76, 79, 80, 83, 84, 85, 89, 93, 94, 96, 98, 100], "runtime_config": [37, 50], "runtime_default": 85, "runtime_error": 1, "runtime_rank": 88, "runtimedefault": [0, 85], "runtimedefaultsin": 85, "runtimeerror": [70, 71, 93], "runtimetensor": 88, "s0": 5, "s1": 5, "s2": 5, "sacrif": 26, "sad": 88, "saeyoonoh": 94, "safe": [1, 7, 28, 81], "safer": 83, "safetensor": [15, 17, 93, 94], "sage_attn": 83, "sage_attn_k_block_s": 83, "sage_attn_k_quant_s": 83, "sage_attn_q_block_s": 83, "sage_attn_q_quant_s": 83, "sage_attn_v_block_s": 83, "sage_attn_v_quant_s": 83, "sageattent": 83, "sai": [29, 73, 76, 80], "said": 78, "sake": 80, "sale": 74, "same": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 19, 22, 27, 28, 29, 30, 53, 56, 57, 58, 66, 70, 71, 74, 75, 78, 81, 82, 83, 84, 86, 88, 90, 94], "sampl": [0, 1, 3, 5, 16, 18, 20, 26, 27, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 59, 60, 65, 69, 71, 73, 74, 75, 83, 84, 88, 94], "sample_proj_bia": 84, "sample_weight_strip": 94, "samplemod": 83, "sampler": 71, "sampling_config": 88, "sampling_param": [37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 60, 67, 68, 71, 76, 82, 89, 94, 95], "samplingconfig": [0, 3, 6, 37, 88, 94], "samplingparam": [37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 60, 67, 68, 71, 76, 82, 89, 94, 95], "saniti": [67, 68, 77, 78, 81], "santacod": [70, 91, 92], "satfinit": 91, "satisfi": [6, 17, 29, 94], "satur": 29, "save": [5, 9, 12, 19, 20, 27, 28, 29, 30, 32, 47, 50, 70, 71, 73, 74, 78, 81, 82, 90, 94], "save_checkpoint": [19, 85], "save_config": [19, 85], "saw": [78, 89], "sbatch": [16, 56, 57, 58], "sbsa": [94, 95], "scaffold": [94, 96], "scalabl": 29, "scalar": [6, 11, 83], "scalartyp": 94, "scale": [0, 6, 10, 17, 28, 30, 71, 78, 83, 84, 91, 94], "scale_d0": 83, "scale_d1": 83, "scale_factor": 83, "scale_output": 83, "scale_qk": 84, "scale_typ": 83, "scalia": [43, 47, 48, 50, 51], "scaling_factor": 83, "scaling_long_factor": 83, "scaling_short_factor": 83, "scalingvecpoint": 1, "scanreducetempstorag": 1, "scanreducetempstoragebyt": 1, "scantempstorag": 1, "scantempstoragebyt": 1, "scatter": [7, 29, 83], "scatter_nd": 83, "scenario": [2, 5, 11, 12, 15, 20, 23, 25, 26, 28, 29, 30, 33, 74, 75, 76, 78, 80, 81, 94], "scfg": 88, "schedul": [0, 2, 3, 9, 10, 20, 27, 28, 29, 30, 31, 52, 71, 74, 76, 81, 90, 94, 95], "schedule_request": 100, "scheduled_request": 100, "scheduler_config": [71, 82], "schedulerconfig": [0, 71, 82, 94], "schedulerpolici": 94, "schema": [0, 3, 46, 71, 74], "scheme": 0, "scicod": 26, "scienc": [43, 47, 48, 50, 51, 53], "scope": [18, 27, 94], "score": [6, 28], "scout": 92, "scratch": [29, 74, 76, 77, 81], "script": [10, 14, 16, 19, 20, 29, 32, 56, 57, 58, 66, 70, 73, 74, 75, 76, 86, 91, 93, 94, 95, 96], "sd3": 84, "sd35adalayernormzerox": 84, "sd3patchemb": 84, "sd3transformer2dmodel": 85, "sd3transformer2dmodelconfig": 85, "sdxl": 94, "seamless": 94, "search": [0, 1, 3, 6, 12, 18, 24, 30, 31, 37, 50, 65, 71, 78, 80, 83, 94, 97], "seashor": [35, 62], "seat": [43, 47, 48, 50, 51], "sec": [20, 22, 74, 75, 76, 78, 80, 81], "second": [1, 3, 6, 9, 10, 12, 20, 21, 23, 24, 26, 29, 71, 80, 83], "secondari": [0, 8, 71, 90], "secondary_offload_min_prior": 71, "secondaryoffloadminprior": 0, "secondli": 80, "section": [3, 6, 16, 17, 19, 20, 27, 28, 29, 31, 66, 70, 72, 74, 76, 78, 79, 80, 81, 83, 89, 92, 94, 98], "section_s": 83, "secur": [46, 94], "securityprotocol": 46, "see": [0, 1, 5, 6, 8, 12, 16, 17, 20, 21, 23, 24, 25, 27, 28, 29, 31, 32, 33, 35, 41, 62, 68, 74, 75, 76, 78, 80, 81, 82, 83, 84, 85, 90, 91, 93, 94, 99], "seed": [0, 6, 31, 39, 40, 71, 87, 94], "seem": [9, 52, 59, 74, 77], "seen": [12, 20, 29, 74], "segment": 94, "select": [0, 4, 6, 18, 25, 26, 28, 30, 74, 81, 83, 88, 90, 97, 100], "selectcontextid": 0, "selectgenidx": 0, "selective_scan": 83, "self": [0, 5, 7, 14, 16, 17, 53, 71, 74, 83, 85, 88, 93, 96, 99, 100], "self_attent": 17, "self_attention_mask": 84, "self_attention_packed_mask": 84, "self_attn": [17, 96], "selfidx": 0, "sell": 74, "semicolon": 66, "senat": [43, 47, 48, 50, 51], "send": [0, 2, 16, 26, 29, 31, 76, 77, 83, 89, 94], "sens": 78, "sensit": [26, 29, 78], "sent": [0, 12, 28, 29, 31, 71], "sentenc": [0, 6, 71, 89], "separ": [11, 12, 29, 30, 55, 66, 74, 83, 88, 98], "separate_match_rewrit": 7, "seq": [1, 5, 74, 83], "seq_idx": 88, "seq_len": [75, 83, 84, 98], "seq_length": 83, "seq_lens_cuda": 98, "seqlen": [0, 83], "seqslot": 1, "sequenc": [0, 1, 3, 5, 6, 7, 8, 9, 12, 16, 20, 21, 22, 23, 24, 26, 27, 28, 29, 71, 72, 74, 75, 76, 79, 82, 83, 84, 88, 90, 94, 98, 99], "sequence_length": [83, 84, 88, 93], "sequence_length_buff": 88, "sequence_limit_length": 88, "sequenceindex": [0, 3], "sequencelengthscba": 1, "sequencelimitlength": 1, "sequenti": [0, 2, 12, 27, 90], "seri": 94, "serial": [30, 83, 85, 88], "serializ": 71, "serialize_engin": 88, "serializeds": 0, "serializedst": 0, "serv": [0, 2, 3, 5, 8, 12, 16, 18, 24, 25, 29, 34, 35, 36, 38, 39, 40, 41, 42, 61, 62, 63, 65, 71, 81, 94, 97, 98], "server": [0, 9, 12, 16, 18, 22, 29, 32, 34, 35, 36, 38, 39, 40, 61, 62, 63, 65, 94], "server_rol": 31, "server_start_timeout": 31, "servic": [18, 60, 65], "session": [5, 70, 74, 88], "set": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 15, 17, 18, 19, 20, 26, 28, 29, 30, 31, 37, 46, 56, 57, 58, 66, 68, 71, 72, 73, 75, 76, 78, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 93, 94, 100], "set_attn_processor": 85, "set_from_opt": 1, "set_if_not_exist": 85, "set_input_shap": 88, "set_rank": 85, "set_rel_attn_t": 84, "set_shap": 88, "setadditionalmodeloutput": [0, 3], "setallottedtimem": 0, "setbackend": 0, "setbadword": 0, "setbatchingtyp": 0, "setbeamsearchdiversityr": 0, "setbeamwidth": 0, "setbeamwidtharrai": 0, "setbitto": 0, "setcachest": 0, "setcachetransceiverconfig": 0, "setclientid": 0, "setcommst": 0, "setcommunicationmod": 0, "setcommunicationtyp": 0, "setcontextfmha": 1, "setcontextphaseparam": [0, 2], "setcopyonpartialreus": 0, "setcrossattentionmask": 0, "setcrosskvcachefract": 0, "setcudagraphcaches": 0, "setcudagraphmod": 0, "setdatatyp": 1, "setdebugconfig": 0, "setdebuginputtensor": 0, "setdebugoutputtensor": 0, "setdebugtensornam": 0, "setdebugtensorsmaxiter": 0, "setdecodingconfig": 0, "setdecodingmod": 0, "setdeviceid": 0, "seteagleconfig": 0, "setearlystop": 0, "setembeddingbia": 0, "setenableblockreus": 0, "setenablechunkedcontext": 0, "setenablecontextfmhafp32acc": 0, "setenablepartialreus": 0, "setenabletrtoverlap": 0, "setencodedvocab": 0, "setencoderhiddens": 1, "setencoderinputfeatur": 0, "setencoderinputtokenid": 0, "setencoderoutputlength": 0, "setendid": 0, "seteventbuffermaxs": 0, "setexecutionconfig": 1, "setextendedruntimeperfknobconfig": 0, "setexternaldrafttokensconfig": 0, "setfreegpumemoryfract": 0, "setfrequencypenalti": 0, "setfrom": 0, "setfrominput": 1, "setgathergenerationlogit": 0, "setgemmallreducedtyp": 1, "setgpuweightsperc": [0, 13], "setguideddecodingconfig": 0, "setguideddecodingparam": 0, "sethostcaches": 0, "setinittozero": 1, "setisorchestr": 0, "setiterstatsmaxiter": 0, "setkvcacheconfig": 0, "setkvcacheretentionconfig": 0, "setkvcachetyp": 1, "setlanguageadapteruid": 0, "setlayertyp": 1, "setlengthpenalti": 0, "setlevel": 1, "setlogitsdtyp": 1, "setlogitspostprocessor": 0, "setlogitspostprocessorconfig": 0, "setlogitspostprocessornam": 0, "setlookaheadconfig": 0, "setlookaheaddecodingconfig": 0, "setloraconfig": 0, "setloramodul": 1, "setmanagedweightsmap": 1, "setmanageweightstyp": 1, "setmaxattentionwindowvec": 0, "setmaxbatchs": [0, 1], "setmaxbeamwidth": [0, 1], "setmaxdraftpathlen": 1, "setmaxdrafttoken": 1, "setmaxencoderlen": 1, "setmaxinputlen": 1, "setmaxlorarank": 1, "setmaxnumpath": 1, "setmaxnumtoken": [0, 1], "setmaxpagesperblock": 1, "setmaxpositionembed": 1, "setmaxpromptembeddingtables": 1, "setmaxqueues": 0, "setmaxseqidlemicrosecond": 0, "setmaxsequencelen": 1, "setmaxtoken": 0, "setmedusachoic": 0, "setmem": 1, "setmemorytyp": 1, "setminp": 0, "setmintoken": 0, "setmlphiddens": 1, "setmodelnam": 1, "setmodelvari": 1, "setmropeconfig": 0, "setmultiblockmod": 0, "setmultimodalembed": 0, "setnbcrosskvhead": 1, "setnbkvhead": 1, "setnorepeatngrams": 0, "setnormalizelogprob": 0, "setnumcopystream": 1, "setnumdecodingenginetoken": 1, "setnumkvheadspercrosslay": 1, "setnumkvheadsperlay": 1, "setnumlanguag": 1, "setnumnod": 0, "setnumreturnsequ": 0, "setonboardblock": 0, "setorchestratorconfig": 0, "setorchleadercomm": 0, "setoutputconfig": 0, "setpadid": 0, "setpagedcontextfmha": 1, "setpagewidth": 1, "setparallelconfig": 0, "setparticipantid": 0, "setpath": 1, "setpeftcacheconfig": 0, "setpositionid": 0, "setppreducescatt": 1, "setpresencepenalti": 0, "setprior": 0, "setprocessorbatch": 0, "setprocessormap": 0, "setprompttableoffload": 0, "setprompttuningconfig": 0, "setquantmod": 1, "setrecvpollperiodm": 0, "setrepetitionpenalti": 0, "setrepl": [0, 3], "setrequeststatsmaxiter": 0, "setrequesttyp": [0, 2], "setreturnallgeneratedtoken": 0, "setrnnconfig": 1, "setrotaryembeddingdim": 1, "setsamplingconfig": 0, "setschedulerconfig": 0, "setse": 0, "setsecondaryoffloadminprior": 0, "setsinktokenlength": 0, "setsizeperhead": 1, "setskipcrossattnblock": [0, 1], "setslotsperpag": 1, "setspawnprocess": 0, "setspecdecconfig": 0, "setspeculativedecodingmod": 1, "setspeculativedecodingmodul": 1, "setstoptokenid": 0, "setstopword": 0, "setstream": 0, "settemperatur": 0, "setter": [0, 6], "settokenizerstr": 0, "settokensperblock": 1, "settopk": 0, "settopp": 0, "settoppdecai": 0, "settoppmin": 0, "settoppresetid": 0, "settotalnumpag": 1, "setup": [1, 5, 30, 46, 56, 57, 58, 68, 77, 78, 88, 89, 90, 94], "setup_fake_prompt": 88, "setup_fake_prompts_qwen2vl": 88, "setup_fake_prompts_vila": 88, "setup_input": 88, "setupeagl": 1, "setupexplicitdrafttoken": 1, "setuplookahead": 1, "setupspeculativedecod": 1, "setuptool": [67, 68], "setusecrossattent": 1, "setusegpudirectstorag": 0, "setusemrop": 1, "setusepositionembed": 1, "setuseshapeinfer": 1, "setusetokentypeembed": 1, "setworkerexecutablepath": 0, "setzero": [0, 1], "seve": 71, "sever": [0, 1, 2, 5, 7, 12, 15, 27, 29, 37, 78, 79, 80, 81, 83, 90, 93, 98], "sft": 59, "sglang": 29, "sh": [16, 32, 94, 95], "shah": 94, "shaken": 52, "shall": [19, 90], "shape": [0, 1, 5, 7, 10, 15, 16, 26, 28, 71, 81, 83, 85, 88, 90, 91, 93, 94, 98, 99], "shape_cast_dtyp": 83, "shapeequ": 1, "shard": [17, 26, 65, 74, 79, 83, 84], "shard_map": 17, "sharding_along_vocab": 71, "sharding_dim": [83, 84], "share": [1, 2, 3, 5, 7, 8, 9, 10, 12, 19, 20, 25, 26, 27, 28, 29, 30, 66, 77, 78, 83, 84, 94], "share_embed": 94, "share_weight": 84, "shared_embedding_t": 94, "shared_fc1": 28, "shared_fc2": 28, "shared_ptr": [0, 1], "sharedconstptr": 1, "sharedptr": 1, "shelf": 94, "sherlock113": 94, "shift": [11, 27, 29], "ship": [19, 52], "shm": [29, 93], "short": [5, 29, 74, 78, 80], "short_mscal": [83, 84], "shorter": [5, 75], "shot": 94, "should": [0, 1, 2, 3, 7, 9, 10, 11, 19, 20, 28, 29, 37, 43, 46, 47, 48, 50, 51, 53, 54, 56, 57, 58, 59, 66, 71, 74, 75, 76, 77, 81, 82, 83, 84, 86, 88, 90, 94, 96, 98, 99, 100], "should_stop": 88, "shouldus": 5, "show": [2, 3, 16, 22, 26, 27, 28, 29, 31, 41, 75, 76, 80, 81, 89, 90, 92, 95], "showcas": [78, 81, 89], "shown": [11, 23, 27, 29, 31, 66, 70, 74, 76, 78, 80, 81, 83], "shrunk": 83, "shuffl": 83, "shut": 2, "shutdown": [0, 60, 70, 71], "si": 5, "sibl": 16, "side": [3, 29, 83], "side_stream_id": 83, "sidestreamidtyp": 83, "sigh": 59, "sigmoid": [16, 83], "signal": 0, "signatur": [7, 53, 83], "signifi": 80, "signific": [3, 5, 8, 23, 27, 28, 29, 59, 77, 78, 80, 81], "significantli": [25, 26, 27, 28, 29, 76, 77, 78, 80, 81, 90, 98], "silicon": 28, "silu": [16, 83, 84], "similar": [0, 5, 6, 7, 12, 20, 21, 23, 27, 29, 37, 50, 54, 73, 74, 82, 83, 97, 100], "similarli": 12, "simpl": [2, 7, 8, 12, 16, 29, 41, 53, 66, 70, 72, 75, 89, 95], "simpler": [12, 29], "simpleschedul": 100, "simplest": 83, "simpli": [5, 12, 72, 74, 75, 80, 89, 93, 96], "simplic": 19, "simplifi": [5, 19, 74, 80, 83, 94], "simultan": [12, 80], "sin": [0, 83, 84], "sinc": [0, 1, 4, 5, 7, 9, 12, 13, 19, 20, 27, 28, 29, 32, 37, 66, 71, 74, 76, 77, 78, 80, 81, 83, 85, 90, 97, 99, 100], "sincer": 28, "sinco": 84, "singl": [0, 1, 2, 3, 4, 5, 6, 8, 12, 14, 16, 19, 20, 23, 24, 26, 27, 28, 29, 30, 35, 53, 62, 70, 71, 73, 74, 78, 81, 83, 85, 89, 90, 91, 94, 96, 97, 98, 99], "singleton": [7, 83], "sink": [0, 1, 5, 71, 88], "sink_token_len": 88, "sink_token_length": [5, 71, 88], "sinktokenlength": [0, 1], "sinusoid": 84, "sit": [19, 59], "situaiton": 75, "situat": [12, 59, 65, 76, 80], "six": 27, "size": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 13, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 37, 53, 56, 57, 58, 65, 71, 73, 74, 75, 76, 77, 78, 79, 81, 83, 84, 85, 88, 93, 94, 98, 100], "size_t": [0, 1], "size_typ": [0, 1], "sizeof": 1, "sizeperhead": [0, 1], "sizetype32": [0, 1], "sizetype64": [0, 1], "skip": [0, 1, 7, 17, 20, 33, 60, 66, 71, 83, 100], "skip_attn": [83, 84], "skip_cross_attn_block": [85, 88], "skip_cross_kv": [84, 88], "skip_encod": 88, "skip_special_token": [71, 94], "skip_tokenizer_init": [37, 71], "skipcrossattnblock": [0, 1], "sku": [76, 78, 80, 81], "skywork": [91, 92, 94], "sleep": 33, "slice": [1, 4, 17, 83, 94], "slice_shap": 17, "sliceinputtyp": 83, "slicen": 1, "slide": [8, 65, 82, 83, 88, 94], "slider": [20, 26, 74], "sliding_window": 85, "sliding_window_caus": 83, "sliding_window_pattern": 85, "slight": [20, 27, 28, 78, 80, 81], "slightli": [0, 2, 10, 11, 31, 78, 81], "slope": [5, 83], "slot": [0, 1, 29, 94], "slot_map": [83, 85], "slotid": 29, "slotidx": 1, "slotsperpag": 1, "slow": [3, 9, 71, 72, 77], "slower": [8, 19, 28, 77], "slowest": 5, "slurm": [16, 29, 56, 57, 58, 68, 70, 93, 94], "sm": [92, 94], "sm120": 94, "sm80": [92, 94], "sm86": [92, 94], "sm89": [92, 94], "sm90": [92, 94], "small": [5, 9, 11, 12, 16, 25, 26, 27, 28, 29, 76, 78, 80, 81, 83, 90, 93, 94], "smaller": [1, 12, 20, 27, 30, 73, 74, 77, 80, 81, 82, 83, 90, 94], "smallest": [0, 1, 8, 83], "smart": 83, "smaug": [92, 94], "smi": [20, 26, 74, 90], "smile": 59, "smith": [43, 47, 48, 49, 50, 51, 53, 60], "smooth": [19, 71, 94], "smoother": 20, "smoothquant": [7, 25, 65, 94], "smoothquant_v": 71, "snapshot": 74, "snapshot_download": 59, "snip": 74, "snippet": [74, 94, 100], "snshrivas10": 59, "so": [0, 2, 3, 5, 7, 10, 12, 18, 19, 20, 26, 27, 28, 29, 32, 37, 50, 66, 71, 74, 77, 78, 80, 81, 82, 83, 84, 85, 90, 92, 94, 96, 99], "socketst": 0, "softmax": [5, 16, 27, 28, 83, 98], "softplu": 83, "softwar": [3, 5, 16, 28, 29, 65, 72, 94], "solid": 79, "solut": [18, 29, 70, 93, 97], "some": [0, 2, 3, 4, 5, 6, 7, 9, 12, 13, 15, 16, 19, 20, 26, 27, 28, 29, 30, 31, 33, 59, 68, 71, 72, 75, 78, 79, 81, 82, 83, 86, 89, 90, 93, 94, 96, 97, 100], "someth": [16, 37, 52], "sometim": [29, 74], "song": 74, "soon": [0, 21, 22, 23, 24, 25, 27, 29, 37], "sophist": 53, "sora": [35, 62], "sort": [0, 1, 3, 6, 83], "sota": 94, "sourc": [14, 15, 17, 19, 20, 21, 24, 26, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 71, 72, 83, 84, 85, 86, 87, 88, 94], "source_root": [56, 57, 58], "sourcetaskvalu": 1, "soyer": [14, 16, 93], "space": [10, 29, 66, 71, 80, 90, 99], "spaces_between_special_token": [71, 94], "span": [19, 26, 27, 29], "spars": [12, 28, 83, 94], "sparse_fc1": 28, "sparse_fc2": 28, "sparsiti": [29, 30], "spatial_norm_dim": 84, "spawn": [41, 51, 67, 68, 70, 76, 89, 93], "spawnprocess": [0, 2], "spec": [29, 30], "spec_decode_algo": 27, "spec_decode_nextn": 27, "spec_decoding_generation_length": [83, 84, 85], "spec_decoding_is_generation_length_vari": [83, 84, 85], "spec_decoding_max_generation_length": [83, 84], "spec_decoding_packed_mask": [83, 84, 85], "spec_decoding_param": [84, 85], "spec_decoding_position_offset": [83, 84, 85], "spec_decoding_us": [83, 84], "specdec": 0, "specdecconfig": 0, "specdecfastlogitsinfo": 0, "specdecodinggenerationlength": 1, "specdecodinggenerationlengthshost": 1, "specdecodingpackedmask": 1, "specdecodingparam": 84, "specdecodingpositionoffset": 1, "specdecodingstat": 0, "specdecstat": 0, "special": [2, 5, 10, 16, 17, 21, 27, 30, 71, 94], "specif": [0, 1, 4, 6, 7, 8, 10, 11, 12, 15, 19, 22, 25, 26, 28, 29, 31, 53, 66, 68, 74, 77, 78, 81, 83, 89, 94, 96, 97], "specifi": [0, 1, 2, 3, 5, 6, 7, 8, 10, 12, 17, 19, 20, 29, 30, 31, 37, 44, 45, 46, 53, 55, 59, 60, 66, 70, 71, 73, 74, 75, 77, 78, 80, 82, 83, 85, 86, 88, 89, 90, 93, 94, 98], "specul": [0, 1, 3, 26, 29, 65, 69, 71, 74, 76, 83, 94], "speculative_config": [20, 26, 27, 44, 45, 54, 55, 71], "speculative_decod": 94, "speculative_decoding_draft_tokens_extern": 85, "speculative_decoding_mod": [30, 71, 74], "speculative_model": [44, 45, 55, 71], "speculativedecod": 0, "speculativedecodingconfig": 0, "speculativedecodingfastlogitsinfo": 0, "speculativedecodingmetr": 0, "speculativedecodingmod": [71, 85, 94], "speculativedecodingmodul": 94, "speculativedecodingoutput": 1, "speed": [16, 22, 26, 27, 28, 29, 30, 74, 75, 81, 94], "speedup": [22, 24, 25, 26, 28], "spent": 0, "spirit": 29, "split": [1, 4, 5, 10, 16, 71, 74, 77, 78, 83, 90, 94], "split_input_id": 88, "split_prompt_by_imag": 88, "split_siz": 83, "split_size_or_sect": 83, "splittransposecpu": 1, "splittransposecpuinn": 1, "splitwis": 2, "spot": [29, 80], "sq": [25, 91, 94], "sqrt": [5, 83], "squar": [80, 83], "squared_relu": 83, "squeez": [1, 83, 88], "src": [1, 16, 83], "src_seq_len": 83, "srcdesc": 0, "srctype": 1, "srun": [16, 31, 56, 57, 58, 68, 93], "sshd": 32, "ssid": 46, "ssm": 83, "ssm_state": 85, "stabil": [26, 29], "stabl": [5, 17, 29, 30, 76, 80, 81, 83, 94], "stack": [17, 26, 66, 83], "stage": [0, 5, 7, 12, 27, 75, 90, 94, 98], "stai": [22, 25, 29, 77, 81], "stall": 29, "stand": 16, "standalon": 19, "standard": [12, 16, 18, 21, 29, 75, 83], "starcod": [70, 92, 94], "starcoder1": 91, "starcoder2": [91, 94], "starrickliu": 94, "start": [0, 3, 5, 7, 9, 20, 27, 29, 30, 32, 33, 34, 35, 36, 38, 39, 40, 58, 59, 61, 62, 63, 66, 70, 71, 72, 74, 75, 76, 77, 80, 82, 83, 85, 87, 88, 90, 94], "start_dim": 83, "startup": 93, "stat": [0, 71, 94], "state": [0, 1, 3, 4, 5, 7, 8, 9, 12, 20, 26, 27, 29, 30, 41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 60, 67, 68, 71, 74, 75, 76, 80, 82, 83, 89, 94, 95, 100], "state_dtyp": 88, "state_or_ptr": 83, "state_s": 88, "statement": 70, "stateptr": 0, "states": 1, "static": [0, 1, 3, 12, 28, 30, 71, 83, 84, 85, 88, 94], "static_batch": [71, 82], "static_cast": 91, "staticbatchingstat": 0, "statist": [0, 3, 12, 31, 71, 74, 94], "statu": [29, 93], "std": [0, 1, 3, 29], "stddev": [31, 39, 40], "stdev": [20, 57, 73, 74, 75, 76], "stdit": 94, "stdout": [20, 57, 73, 74, 75, 76], "steadi": 75, "steady_clock": 0, "step": [0, 1, 5, 6, 7, 9, 12, 15, 16, 18, 19, 21, 26, 27, 33, 53, 65, 67, 68, 71, 72, 74, 75, 76, 83, 88, 93, 97, 98, 99, 100], "still": [5, 17, 19, 20, 26, 27, 28, 29, 72, 74, 76, 78, 83, 88, 90, 94], "stop": [0, 1, 3, 6, 7, 12, 29, 71, 74, 80, 88, 89, 94], "stop_reason": [54, 71, 89, 94], "stop_token_id": [3, 71], "stop_words_data": 88, "stop_words_list": 88, "stopping_criteria": 88, "stoppingcriteria": [88, 94], "stoppingcriterialist": 88, "stoptokenid": [0, 3], "stopword": 0, "stopwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "storag": [0, 8, 10, 71], "store": [0, 1, 5, 8, 9, 10, 16, 22, 26, 29, 52, 55, 70, 71, 74, 82, 83, 85, 90, 91, 96, 98, 99], "store_tru": 55, "stored_block": 52, "stori": 59, "str": [15, 19, 48, 49, 71, 83, 84, 85, 88], "straightforward": 27, "strategi": [0, 11, 12, 25, 27, 29, 37, 50, 65, 71, 74, 79, 83, 85, 90, 94], "stream": [0, 1, 2, 3, 16, 28, 29, 30, 31, 37, 39, 40, 41, 42, 53, 71, 73, 83, 88, 90, 93, 94], "stream_ptr": 53, "streaming_llm": 94, "streamingllm": [30, 65, 94], "streamlin": [74, 89], "streamptr": [0, 1, 3], "street": 59, "strenum": [71, 87], "strict": [26, 27, 29], "strict_bound": 83, "strict_dtyp": [83, 84], "stricter": 26, "strictli": 74, "stride": [1, 83, 84], "strike": [12, 29, 52], "string": [0, 1, 3, 15, 46, 71, 74, 83, 88], "string_valu": 9, "string_view": 1, "stringptrmap": 1, "stringvec": 0, "strip": [30, 94], "strip_plan": 30, "strong": 29, "strongli": 78, "strongly_typ": [71, 94], "struct": [0, 1, 8], "structur": [0, 4, 7, 8, 12, 28, 53, 71, 83, 90, 94], "structural_tag": 71, "struggl": 59, "student": [43, 47, 48, 50, 51, 53], "studi": [28, 76, 78, 79, 81], "style": [5, 12, 26, 94], "sub": [15, 19, 29, 83], "subclass": [1, 19, 53, 96], "subcommad": 74, "subcommand": [75, 94], "subgraph": [7, 83], "subject": [2, 21, 23, 24, 25, 70, 83, 89, 95], "submiss": 74, "submit": [10, 71, 74], "submit_sync": 71, "submittransferrequest": 0, "submodul": [20, 66, 96], "suboptim": 16, "subscript": 83, "subsequ": [2, 9, 10, 12, 27, 76], "subset": [0, 3, 6, 16, 19, 27, 74, 83], "substanti": [9, 12, 26, 28], "subsystem": 94, "subtract": 7, "succe": [90, 94], "succeed": 88, "success": [3, 22, 26, 75], "successfulli": [12, 33, 78], "sudo": [20, 26, 67, 68, 74], "suffer": [26, 29], "suffici": [77, 78], "suggest": [5, 25, 29, 59, 78], "suit": [5, 29, 74, 75], "suitabl": 29, "sum": [1, 7, 14, 83, 99], "sum_of_token": 83, "summar": [5, 12, 13, 14, 15, 23, 25, 29, 74, 75, 82, 90], "summari": [8, 12, 29, 65], "summat": 83, "sunjiabin17": 94, "super": [7, 14, 17, 19, 92, 93, 96, 100], "superchip": 92, "supplementari": 84, "suppli": [10, 18], "support": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 37, 46, 53, 56, 57, 58, 59, 65, 68, 69, 71, 75, 76, 78, 80, 81, 82, 83, 84, 86, 89, 93, 94, 95, 96, 97, 98, 99, 100], "supportsinflightbatch": 1, "suppos": 96, "suprem": [43, 47, 48, 50, 51], "sure": [2, 19, 20, 27, 29, 33, 66, 74, 82, 83, 94], "surpass": 5, "surround": [5, 94], "swa": 8, "swap": [8, 29], "sweep": [16, 22, 80], "sweet": 80, "swept": 23, "swiftli": 29, "swiglu": [30, 83, 94], "switch": [4, 9, 11, 22, 25, 26, 28, 66, 82, 90, 94], "sxm": [22, 30, 76, 78, 79], "sy": 94, "symbol": 0, "sync": 88, "synchron": [1, 3, 16, 29, 71, 93, 94], "syncmessag": 0, "syntax": [83, 89], "synthet": [20, 31, 39, 40, 74, 75], "synthetic_128_128": 74, "synthetic_2048_2048": 76, "synthetic_2048_2048_1000": 76, "system": [8, 9, 16, 20, 22, 27, 28, 29, 31, 34, 35, 46, 56, 57, 58, 61, 62, 65, 66, 68, 75, 77, 89, 92, 94, 95], "systemat": [26, 29], "t": [0, 1, 5, 12, 16, 19, 26, 28, 29, 31, 32, 37, 52, 56, 57, 58, 68, 71, 73, 74, 77, 80, 81, 83, 85, 88, 93], "t5": [5, 6, 91, 92, 94], "t_": 27, "t_2": 27, "t_5": 27, "tabl": [0, 6, 9, 22, 25, 30, 74, 75, 83, 84, 88, 92, 93, 94], "tackl": 28, "tactic": [28, 30], "tag": [0, 32, 66, 71], "tailor": [25, 78, 81], "take": [0, 1, 2, 5, 6, 7, 9, 11, 15, 19, 27, 29, 52, 59, 72, 74, 76, 77, 80, 83, 84, 99], "taken": [17, 21, 22, 29, 83], "talk": [29, 59], "tanh": [83, 84], "target": [0, 17, 20, 28, 29, 30, 37, 65, 66, 74, 81, 82, 94], "target_isl": 74, "target_osl": 74, "targetcach": 1, "targetpageid": 1, "targetprob": 1, "targettaskvalu": 1, "tarot": 59, "task": [0, 1, 9, 10, 12, 14, 15, 29, 48, 49, 56, 57, 58, 71, 74, 84, 88, 91, 94, 99], "task_id": [10, 74], "task_vocab_s": 84, "taskid": [0, 1], "taskidtyp": 1, "tasklayermoduleconfig": 1, "tasklayermoduleconfigbind": 1, "tasklayermoduleconfiglistptr": 1, "taskshost": 1, "taskvalu": 1, "taskvalueptr": 1, "taslid": 1, "tayef": 94, "tconstptr": 1, "tcp": 33, "team": [15, 19, 20, 26, 27, 28, 29, 33, 92, 94], "tech": [27, 29, 94], "technic": [8, 27, 28, 29, 65], "techniqu": [5, 7, 12, 16, 21, 26, 27, 28, 29, 72, 77, 78, 79, 82, 91, 94], "technologi": [26, 43, 47, 48, 50, 51, 53], "tekit_2025": 74, "tell": [35, 59, 60, 62, 81, 89], "temb": 84, "temp": 88, "temperatur": [0, 1, 6, 31, 34, 35, 36, 37, 41, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 60, 67, 68, 71, 74, 76, 82, 88, 89, 94], "tempfil": [47, 50], "templat": [0, 1, 16, 17], "tempor": 88, "temporari": 2, "ten": [12, 25, 27, 29], "tend": 82, "tensor": [1, 6, 11, 15, 16, 17, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 51, 53, 65, 71, 74, 75, 78, 79, 81, 83, 84, 85, 88, 91, 93, 94, 96, 98], "tensor_dict": 88, "tensor_input": 7, "tensor_parallel_s": [51, 52, 55, 56, 57, 58, 71, 76, 77, 78, 81, 82], "tensor_shap": 17, "tensorconstptr": 1, "tensorinfo": 88, "tensorloc": 83, "tensormap": 1, "tensorparallel": [0, 1, 6], "tensorptr": [0, 1], "tensorrt": [1, 3, 5, 6, 7, 8, 11, 13, 14, 21, 24, 26, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 67, 68, 69, 73, 75, 78, 79, 81, 82, 83, 88, 91, 93, 95, 96, 97, 98, 99, 100], "tensorrt_llm": [0, 1, 2, 3, 5, 6, 7, 10, 13, 14, 16, 17, 19, 20, 31, 32, 33, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 67, 68, 71, 74, 75, 76, 78, 81, 82, 83, 84, 85, 86, 87, 88, 89, 93, 94, 95, 96, 97, 98, 99], "tensorrt_llm_gpt": 16, "tensorrt_llm_rouge1_threshold": 15, "tensorrtllm_backend": [10, 89, 94], "term": [16, 29, 70, 82, 83, 89], "termin": [0, 9, 33, 75, 94], "test": [5, 25, 26, 27, 31, 35, 62, 65, 66, 67, 68, 74, 75, 76, 78, 79, 80, 81, 82, 92, 94, 99], "test_graph_rewrit": 7, "test_trt_llm": [13, 14, 15], "texec": 0, "text": [0, 3, 5, 6, 9, 30, 35, 37, 41, 42, 43, 51, 52, 60, 62, 67, 68, 71, 72, 74, 75, 76, 82, 88, 89, 92, 93, 94, 95], "text_diff": 71, "text_hidden_s": 85, "textattack": 92, "textprompt": 71, "tg_group": 83, "tgt": [16, 83], "tgt_len": [83, 84], "tgt_seq_len": 83, "th": [1, 15, 27, 83], "than": [0, 1, 2, 3, 5, 6, 7, 9, 12, 16, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 66, 71, 72, 74, 75, 76, 77, 78, 80, 82, 83, 88, 90, 93, 94, 98], "thank": [27, 29, 94], "thecodewrangl": 94, "thei": [0, 1, 3, 5, 6, 10, 16, 17, 19, 26, 27, 28, 29, 54, 66, 71, 74, 76, 78, 80, 81, 82, 83, 85, 91, 94], "them": [0, 3, 4, 7, 12, 13, 20, 26, 27, 28, 29, 56, 57, 58, 71, 72, 73, 74, 77, 79, 80, 82, 83, 88, 90, 96], "theoret": [29, 90], "theori": 82, "therebi": [2, 82], "therefor": [13, 19, 75, 83, 93, 99], "thermal": 74, "theta": 83, "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 41, 46, 53, 55, 56, 57, 58, 59, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100], "thin": 19, "thing": [6, 33, 43, 47, 48, 50, 51, 53, 80, 81], "think": [26, 27, 28, 52, 79], "third": [3, 94], "thorough": 29, "those": [3, 5, 6, 15, 16, 18, 20, 26, 27, 28, 29, 30, 31, 73, 75, 76, 81, 83, 84, 91], "though": [19, 27, 29, 80, 90], "thread": [0, 1, 5, 11, 29, 37, 70, 74, 88], "three": [2, 3, 15, 25, 26, 28, 82, 83, 91, 96, 97, 98], "threshold": [0, 26, 27, 83, 88], "throttl": 74, "through": [0, 5, 6, 7, 11, 12, 16, 17, 18, 20, 26, 29, 30, 31, 66, 72, 74, 76, 77, 78, 80, 81, 84, 89, 94], "throughout": [76, 79], "throughput": [0, 3, 5, 21, 22, 23, 27, 29, 57, 65, 73, 78, 80, 81, 82, 94, 98], "throw": [0, 1], "thu": [9, 19, 20, 26, 28, 29, 66, 83, 90], "thumb": [5, 77, 93], "ti": [5, 27], "tiiuae": 74, "tile": 28, "time": [0, 1, 2, 3, 5, 9, 10, 11, 12, 13, 16, 20, 23, 25, 26, 27, 28, 29, 30, 43, 47, 48, 49, 50, 51, 59, 65, 66, 71, 72, 73, 74, 75, 76, 78, 79, 80, 82, 83, 88, 93, 94, 99], "time_embed_dim": 84, "time_encod": 88, "time_point": 0, "timedelta": 71, "timedout": 0, "timelin": 15, "timeout": [0, 29, 31, 37, 71, 94], "timepoint": 0, "timestamp": 0, "timestep": [84, 85], "timestepembed": 84, "timingmetr": 0, "tini": 59, "tinyllama": [31, 34, 36, 39, 41, 43, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 60, 61, 63, 67, 68, 70, 89, 95], "tip": 65, "titl": 46, "tle": 13, "tllm_checkpoint_16gpu_tp8_pp2": 77, "tllm_ckpt_dir": 14, "tllm_engine_dir": 14, "tllm_kei": [17, 84], "tllm_llmapi_build_cach": 94, "tllm_llmapi_enable_nvtx": 73, "tllm_log_level": 93, "tllm_nvtx_debug": 73, "tllm_override_layer_num": 94, "tllm_profile_record_gc": 73, "tllm_profile_start_stop": 73, "tllm_to_externel_key_dict": 17, "tllm_torch_profile_trac": 73, "tllm_trace_model_forward": 94, "tllm_weight": 17, "tllmruntim": [1, 6, 93], "tlntin": 94, "tmp": [10, 13, 57, 73, 74, 77], "tmp9so41y3r": 74, "tmpowsrb_f4": 74, "tmpxhdvasex": 74, "to_arrai": 83, "to_dict": [71, 85], "to_json_fil": 85, "to_layer_quant_config": 85, "to_legacy_set": 86, "to_str": [0, 1, 3], "to_trt": 85, "tobyt": 1, "todo": [1, 55, 83], "togeth": [3, 5, 6, 10, 16, 18, 21, 26, 27, 30, 88, 91, 94], "toggl": 73, "toi": 80, "toitensor": 0, "tojsonstr": 0, "tok": [21, 23, 24, 81], "token": [0, 1, 2, 3, 4, 5, 6, 8, 9, 12, 16, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 40, 46, 52, 53, 57, 65, 71, 73, 74, 75, 76, 78, 79, 81, 83, 84, 85, 88, 89, 90, 91, 94, 96, 97, 98], "token_drop": 84, "token_end": 71, "token_extra_id": 52, "token_id": [37, 52, 53, 54, 71], "token_ids_diff": 71, "token_range_retention_config": 71, "token_start": 71, "token_type_id": [85, 88], "tokenend": 0, "tokenextraid": 1, "tokenextraidtyp": 1, "tokenid": 1, "tokenidtyp": [0, 1], "tokenization_utils_bas": 71, "tokenizer_dir": [14, 16, 89, 93], "tokenizer_image_token": 88, "tokenizer_max_seq_length": [71, 78, 85, 87], "tokenizer_mod": 71, "tokenizer_revis": 71, "tokenizer_str": [0, 3], "tokenizerbas": 71, "tokenizerstr": [0, 3], "tokenlogprob": 71, "tokenrangeretentionconfig": [0, 71], "tokenrangeretentionprior": 0, "tokens_per_block": [8, 9, 30, 88, 94, 99], "tokensperblock": [0, 1, 6], "tokensperstep": 1, "tokensprompt": 71, "tokenstart": 0, "tokyo": [35, 62], "toler": [25, 29], "tomodulenam": 1, "tomoduletyp": 1, "tonylek": 94, "too": [3, 5, 20, 28, 29, 76, 80, 93], "took": 76, "tool": [2, 15, 20, 28, 29, 65, 70, 74, 94], "tool_cal": 89, "toolkit": [18, 19, 25, 26, 68, 97], "top": [0, 5, 6, 12, 16, 18, 27, 28, 29, 71, 83, 94], "top1": 26, "top_k": [6, 71, 88, 94], "top_p": [6, 41, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 60, 67, 68, 71, 76, 82, 88, 89], "top_p_decai": [71, 88], "top_p_min": [71, 88], "top_p_reset_id": [71, 88], "topenkoff": 94, "topic": [29, 81], "topk": [0, 1, 4, 6, 12, 26, 28, 83, 94], "topk_logit": 3, "topklastdim": 83, "topklogit": 3, "topkmedusahead": 1, "topktopp": [0, 6], "topmodelmixin": [19, 85], "topn": 26, "topologi": 29, "topp": [0, 1, 6, 94], "toppdecai": [0, 1, 6], "toppmin": [0, 1, 6, 71], "toppresetid": [0, 1, 6], "torch": [5, 17, 53, 60, 66, 67, 68, 71, 74, 83, 88, 93, 96], "torch_compile_en": 71, "torch_compile_enable_userbuff": 71, "torch_compile_fullgraph": 71, "torch_compile_inductor_en": 71, "torch_compile_piecewise_cuda_graph": 71, "torchaudio": [67, 68], "torchllmarg": 71, "torchvis": [67, 68], "tostr": [0, 1], "total": [0, 1, 4, 5, 6, 12, 15, 17, 20, 27, 29, 30, 31, 74, 75, 76, 77, 90, 99], "total_lat": [21, 24], "total_token": 89, "totalaccepteddrafttoken": 0, "totaldrafttoken": 0, "totalgentoken": 1, "totalnumpag": 1, "totensor": 0, "touch": [32, 96], "toward": 29, "tp": [0, 2, 4, 6, 10, 16, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 57, 74, 75, 76, 83, 94], "tp1": [21, 22, 23], "tp2": 74, "tp4": 26, "tp4ep2": 26, "tp8": [23, 26, 28], "tp8ep2": 26, "tp_1_pp_1": 74, "tp_dim": [17, 84], "tp_group": [83, 84], "tp_rank": [17, 83, 84], "tp_size": [4, 10, 15, 16, 17, 19, 31, 38, 56, 58, 74, 75, 77, 83, 84, 87, 94], "tp_split_dim": 84, "tpot": [24, 75], "tprank": 1, "tpsize": 1, "tqdm": [17, 71, 94], "trace": [19, 29, 30, 31, 73, 93], "track": [5, 8, 29, 71, 83], "trade": [9, 28], "tradeoff": [25, 26, 27, 78], "tradit": 0, "traffic": 29, "train": [12, 14, 15, 16, 18, 19, 22, 25, 27, 74, 83, 93, 96], "trait": 94, "transa": 83, "transb": 83, "transceiv": [0, 71], "transfer": [0, 2, 16, 28, 29, 53, 71, 94], "transfer_mod": 71, "transferdesc": 0, "transfermod": 0, "transferop": 0, "transferrequest": 0, "transferstatu": 0, "transform": [0, 4, 5, 12, 14, 15, 16, 17, 30, 31, 37, 71, 85, 89, 90, 92, 93, 94, 96, 97, 99], "translat": [82, 94], "transmiss": [2, 11], "transmit": [2, 11], "transpar": 29, "transpos": [1, 15, 83], "transposit": 83, "travers": 16, "treat": [5, 26, 83], "tree": [0, 74, 88, 93, 99], "tri": [28, 100], "tricki": 85, "trigger": [5, 7, 16, 29, 30, 37, 60, 70], "trim": 1, "trimpool": 1, "triton": [9, 10, 12, 16, 18, 65, 72, 94], "tritonserv": 94, "trivial": 16, "troubleshoot": [65, 94], "trt": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 22, 32, 47, 50, 74, 80, 83, 85, 87, 88, 90, 93, 94, 98], "trt_ckpt": [10, 13, 15, 93], "trt_engin": [10, 13, 15, 93], "trt_root": 20, "trt_tensor": [16, 83], "trtdatatyp": 1, "trtgptmodel": 90, "trtgptmodeloptionalparam": 94, "trtgptmodelv1": 94, "trtllm": [9, 10, 13, 14, 15, 16, 19, 20, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 56, 61, 62, 63, 65, 70, 71, 74, 75, 78, 79, 80, 81, 90, 93, 94], "trtllm_dg_jit_use_nvcc": 20, "trtllm_disable_kv_cache_transfer_overlap": 2, "trtllm_disable_unified_convert": 17, "trtllm_enable_kvcache_receive_parallel": 2, "trtllm_enable_mmha_multi_block_debug": 74, "trtllm_enable_pdl": [20, 26, 27, 74], "trtllm_force_xqa": 5, "trtllm_kvcache_send_max_concurrency_num": 2, "trtllm_kvcache_transfer_buffer_s": 2, "trtllm_kvcache_transfer_use_async_buff": 2, "trtllm_mmha_blocks_per_sequ": 74, "trtllm_mmha_kernel_block_s": 74, "trtllm_model": 17, "trtllm_modules_to_hf_modul": 88, "trtllm_parallel_cache_send": 2, "trtllm_pdl_overlap_ratio": 74, "trtllm_precompiled_loc": 66, "trtllm_prefetch_ratio": 74, "trtllm_request_kv_cache_concurr": 2, "trtllm_serv": 31, "trtllm_try_zcopy_for_kvcache_transf": 2, "trtllm_use_mpi_kvcach": 2, "trtllm_use_precompil": 66, "trtllm_use_ucx_kvcach": 2, "trtllmarg": 71, "trtllmattent": 98, "trtlmmdatatyp": 0, "true": [0, 1, 3, 6, 7, 9, 12, 15, 20, 26, 27, 28, 29, 31, 37, 43, 44, 45, 49, 50, 52, 53, 54, 55, 57, 59, 71, 73, 74, 75, 78, 81, 83, 84, 85, 86, 88, 90, 93, 94], "true_output_valu": 83, "true_valu": 83, "truncat": [71, 94], "truncate_prompt_token": [71, 94], "trust": [28, 31, 71], "trust_remote_cod": [31, 71, 94], "try": [0, 1, 3, 14, 19, 29, 54, 59, 70, 75, 78, 80, 81, 82, 89, 90, 93, 95], "tsuji": 74, "ttensor": 1, "ttft": [75, 78, 80, 81, 82, 94], "ttim": 94, "ttl": 26, "tunabl": 79, "tune": [0, 2, 3, 12, 22, 25, 26, 28, 29, 30, 65, 71, 74, 75, 78, 81, 84, 85, 88, 89, 90, 94], "tuner": 0, "tupl": [0, 1, 83, 84, 88, 100], "turn": [5, 6, 9, 12, 28, 66, 78, 88, 90, 94], "tushar": 94, "tweak": 82, "twice": 16, "two": [0, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 19, 22, 26, 27, 28, 29, 30, 31, 35, 62, 66, 70, 74, 76, 78, 80, 82, 83, 84, 86, 94, 97, 99, 100], "twofold": 12, "twoshot": 83, "txt": [19, 20, 57, 68, 73, 74, 76, 89, 94], "type": [1, 2, 3, 5, 6, 7, 10, 15, 16, 22, 25, 28, 30, 31, 34, 35, 36, 39, 40, 46, 52, 53, 55, 62, 71, 74, 78, 81, 83, 85, 87, 88, 89, 91, 92, 93, 94, 96, 97, 98, 99], "typedef": [0, 1], "typenam": [0, 1, 16], "typetrait": 0, "typic": [0, 2, 7, 14, 16, 19, 25, 27, 28, 29, 31, 68, 70, 77, 78, 81, 82, 86, 88, 90, 94, 96], "typo": 94, "u": [1, 7, 28, 29, 32, 43, 47, 48, 49, 50, 51, 60, 74, 75, 94], "ub": 83, "ub_oneshot": 74, "ub_tp_siz": 74, "ubuntu": [67, 68, 94, 95], "uc_handl": 1, "uc_ptr": 1, "uc_va": 1, "ucx": [2, 94], "ucx_cuda_copy_async_mem_typ": 2, "ucx_cuda_copy_dmabuf": 2, "ucx_info": 2, "ucx_memtype_cach": 2, "ucx_rndv_frag_mem_typ": 2, "ucx_rndv_pipeline_error_handl": 2, "uid": [0, 88], "uint16_t": 0, "uint32": 1, "uint32_t": [0, 1, 83], "uint64": [1, 9], "uint64_t": [0, 1], "uint8": 1, "uint8_t": [0, 1], "uintptr_t": [0, 1], "uk": 28, "uk_bgemm": 26, "ulimit": [66, 93], "ultim": 77, "ulyss": 94, "unabl": [68, 80], "unaccept": 78, "unari": 83, "unaryoper": 83, "unbind": 83, "uncas": 92, "uncertainti": 12, "unchang": [12, 29, 81, 83], "uncommon": 16, "undefin": 83, "under": [0, 25, 30, 66, 70, 74, 75, 93, 94], "underli": [0, 1, 7, 12, 29], "underlying_type_t": 1, "underlyingtyp": [0, 1], "underscor": 78, "understand": [29, 65, 66, 73], "understood": [71, 80], "underutil": 12, "uneven": 94, "unevenli": 26, "unexpect": [93, 94], "unfinish": 0, "unfus": 83, "unfuse_qkv_project": 85, "ungath": 1, "unguid": 46, "unif": 94, "unifi": [15, 19, 25, 94], "uniform": [74, 75, 83], "uniniti": 98, "uninstal": 68, "union": [71, 83], "uniqu": [0, 5, 6, 8, 10, 12, 15, 30, 71, 74], "unique_ptr": [0, 1], "unique_token": 52, "uniqueconstptr": 1, "uniqueptr": 1, "uniquetoken": 1, "unit": [1, 8, 17, 28, 41, 43, 44, 45, 47, 48, 49, 50, 51, 53, 55, 60, 65, 66, 67, 68, 74, 76, 82, 89, 95], "univers": [43, 47, 48, 50, 51, 53], "unless": [0, 37, 71, 77, 81, 82], "unlik": [9, 12], "unlock": [29, 72], "unnecessari": [7, 94, 96, 100], "unneed": [5, 26], "unordered_map": [0, 1, 3], "unpatchifi": 85, "unschedul": 80, "unset": [29, 82], "unsign": 1, "unspecifi": [30, 31, 83], "unsqueez": [1, 83], "unstabl": 19, "unsupport": 94, "until": [0, 1, 3, 6, 9, 12, 29], "untouch": 83, "unus": [0, 74], "up": [0, 5, 6, 10, 12, 20, 22, 23, 26, 27, 28, 29, 30, 46, 71, 74, 80, 81, 94, 99], "up_proj": 17, "upcast": 83, "upcast_attent": 84, "upcast_softmax": 84, "upcom": [25, 99], "updat": [0, 8, 12, 16, 17, 19, 20, 23, 27, 28, 29, 30, 32, 53, 66, 71, 83, 88, 93, 99], "update_from_dict": 71, "update_key_map": 17, "update_kv_cache_typ": 71, "update_output_ids_by_offset": 88, "update_resourc": [97, 99], "update_strategi": 83, "updatenumreturnbeam": 0, "updatespositionid": 1, "upgrad": [67, 68, 89], "uplift": [78, 80, 81], "upon": [12, 75, 81, 93, 94], "upper": [74, 83, 90], "uq_qr_gemm": 26, "url": [31, 35, 39, 40, 62, 66, 67, 68, 94], "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 11, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 41, 42, 46, 49, 56, 57, 58, 59, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 83, 84, 85, 86, 88, 89, 91, 93, 94, 95, 96, 97, 98, 99, 100], "usabl": 95, "usag": [0, 5, 7, 8, 16, 19, 21, 24, 28, 30, 31, 41, 65, 71, 74, 81, 82, 83, 89, 94, 98], "use_beam_hyp": 88, "use_beam_search": [50, 71, 94], "use_cach": [83, 84, 85], "use_context_fmha_for_gener": 94, "use_cuda_graph": [20, 27, 29, 57, 71, 75], "use_custom_all_reduc": 94, "use_diff_of_squar": 83, "use_dynamic_tre": [44, 45, 71], "use_embedding_shar": 94, "use_fp32_acc": 83, "use_fp8": 84, "use_fp8_context_fmha": [5, 30, 74, 94], "use_fused_mlp": [30, 74, 94], "use_gemm_allreduce_plugin": 88, "use_gpt_attention_plugin": 88, "use_gpu_direct_storag": 88, "use_implicit_relative_attent": 84, "use_kv_cach": [71, 84, 88], "use_logn_sc": 84, "use_lora": 85, "use_lora_plugin": 88, "use_mamba_conv1d_plugin": 88, "use_meta_recip": 71, "use_modelopt_ckpt": 55, "use_modelopt_quant": 19, "use_mrop": 71, "use_one_more_block": 88, "use_paged_context_fmha": [5, 9, 30, 74, 78, 81], "use_parallel_embed": [15, 16, 85], "use_preload": 85, "use_prompt_tun": [85, 94], "use_py_sess": 93, "use_refit": 71, "use_relaxed_acceptance_for_think": [26, 27, 71], "use_runtime_default": 88, "use_safetensors_load": 85, "use_strip_plan": 71, "use_tqdm": 71, "use_variable_beam_width_search": 88, "usebantoken": 0, "usebanword": 0, "usecrossattent": 1, "usedefaultvalu": 1, "usednumblock": 0, "usedraftlogit": 1, "usedraftlogitshost": 1, "usedynamictre": 0, "usedynamictreehost": 1, "useexpliciteosstop": 0, "usefrequencypenalti": 0, "usegemmallreduceplugin": 1, "usegptattentionplugin": [1, 6], "usegpudirectstorag": 0, "uselanguageadapt": 1, "useloraplugin": 1, "usemambaconv1dplugin": 1, "usemaxlengthstop": 0, "useminlen": 0, "useminlength": 0, "useminp": 0, "usemrop": 1, "usenorepeatngrams": 0, "useoccurrencepenalti": 0, "usepackedinput": 1, "usepagedst": 1, "usepenalti": 0, "usepositionembed": 1, "usepresencepenalti": 0, "useprogthread": 0, "useprompttun": 1, "user": [0, 2, 3, 5, 6, 7, 9, 10, 11, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 44, 45, 46, 50, 53, 54, 55, 61, 62, 66, 70, 71, 73, 74, 75, 80, 81, 82, 83, 85, 89, 90, 91, 93, 94], "user_buff": [30, 78], "userandomacceptancethreshold": 1, "userbuff": [71, 94], "userepetitionpenalti": 0, "userwarn": 68, "useshapeinfer": 1, "usespecdecod": 1, "usestopword": 0, "usetemp": 0, "usetemperatur": 0, "usetokentypeembed": 1, "usevariablebeamwidthsearch": 0, "usr": [15, 20, 31, 34, 35, 36, 38, 39, 40, 68, 74], "usual": [16, 19, 27, 68, 71, 75, 76, 81, 83, 99], "util": [0, 1, 2, 5, 6, 12, 16, 20, 21, 26, 28, 29, 30, 41, 68, 72, 73, 74, 78, 81, 82, 90, 94, 98], "uv": 28, "uv_gemm": 26, "uvm": [0, 1], "v": [1, 2, 5, 6, 10, 20, 21, 22, 25, 26, 28, 65, 83, 85, 88, 91, 92, 93, 96, 98], "v0": [10, 21, 22, 23, 24, 72, 74, 75, 92, 94], "v1": [31, 34, 35, 36, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 60, 61, 62, 63, 67, 68, 70, 89, 92, 94, 95], "v10": 94, "v100": 94, "v12": 94, "v2": [25, 28, 91, 94], "v3": [27, 29, 31, 73, 91, 92, 94], "v9": 23, "v_dim": 83, "v_head_dim": [83, 84], "v_proj": [17, 96], "vacat": [43, 47, 48, 50, 51], "valid": [0, 1, 3, 12, 27, 29, 71, 75, 83, 88], "validate_cuda_graph_config": 71, "validate_cuda_graph_max_batch_s": 71, "validate_positive_valu": 71, "validatevec": 1, "validationerror": 71, "validmpiconfig": 1, "valu": [0, 1, 2, 5, 6, 8, 9, 10, 11, 13, 15, 16, 17, 20, 21, 22, 27, 28, 30, 31, 37, 60, 71, 74, 76, 78, 80, 82, 83, 85, 86, 87, 88, 90, 91, 93, 94, 98, 99, 100], "valuabl": [26, 29], "value_typ": 0, "valuestatu": 1, "vanilla": [5, 98], "vanillaattent": 98, "var": 83, "vari": [23, 29, 80, 81, 99], "variabl": [0, 1, 6, 8, 17, 20, 23, 26, 29, 56, 57, 58, 65, 68, 71, 73, 74, 93, 94], "variabledraftlength": 1, "varianc": [28, 78, 80, 81, 83], "variant": [0, 3, 5, 19, 21, 27, 28, 70, 83, 89, 94, 98], "varieti": [74, 76, 94], "variou": [5, 12, 18, 29, 74, 78, 80, 94], "varnam": 1, "vartyp": 1, "vboost": [20, 26, 74], "vbw": 94, "ve": [26, 59], "vec": [0, 1], "vec2": 83, "veclogprob": 0, "vectoken": 0, "vectokenextraid": [0, 1], "vector": [0, 1, 3, 5, 6, 8, 10, 28, 83], "vecuniquetoken": [0, 1], "verbatim": 85, "verbos": [30, 31, 74], "veri": [5, 15, 16, 18, 25, 27, 29, 76, 77, 78, 94], "verif": [0, 12, 27, 71], "verifi": [12, 27, 65, 81, 83, 94], "verificationsets": 0, "versa": [9, 28], "version": [0, 1, 2, 5, 6, 15, 17, 19, 20, 26, 28, 29, 31, 37, 66, 68, 74, 76, 83, 89, 93, 94, 95], "vertic": 83, "vertical_strid": 84, "vgqa": 8, "via": [0, 2, 11, 12, 26, 29, 56, 57, 58, 59, 66, 68, 74, 78, 79, 81, 82, 83, 94, 95], "vice": [9, 28, 60], "vicuna": [12, 44, 45, 55], "video": [35, 62, 74, 88, 92, 94], "video_grid_thw": 88, "video_path": 88, "video_preprocess": 88, "video_url": [35, 62], "view": [1, 27, 29, 83, 88], "vila": [35, 62, 91, 92, 94], "vinyl": 74, "violat": 94, "virtual": [0, 1, 84], "vision": [88, 91, 92, 94], "vision_grid_thw": 88, "vision_length": 83, "vision_model_typ": 85, "vision_start": 83, "vision_token_mask": 84, "visit": [12, 26, 94], "visual": [80, 94], "visual_engine_dir": 88, "visual_featur": 88, "visualize_network": [30, 71, 94], "vit": 94, "vital": [7, 25], "vl": [31, 35, 40, 62, 74, 92, 94], "vlm": [92, 94], "vocab": [83, 88], "vocab_embed": [14, 17], "vocab_s": [0, 15, 17, 71, 84, 85, 88, 96], "vocab_size_pad": 88, "vocabs": [1, 6], "vocabsizepad": [0, 1], "vocabulari": [0, 1, 6, 9, 12, 75, 84, 88], "void": [0, 1, 3, 16], "volta": 94, "volum": [1, 11, 66, 74], "volumenonneg": 1, "vonjackustc": 94, "vote": [43, 47, 48, 50, 51], "vswa": 8, "vulner": 94, "vultureprim": 94, "w": [1, 24, 26, 28, 31, 83, 85, 91, 92, 94], "w1": 83, "w4a": [91, 94], "w4a16": [15, 25, 65, 71, 85], "w4a16_awq": [15, 19, 37, 60, 71], "w4a16_gptq": [15, 71], "w4a8": [25, 94], "w4a8_awq": [15, 19, 71], "w4a8_qserve_per_channel": 71, "w4a8_qserve_per_group": 71, "w4aint8": 94, "w8a": 91, "w8a16": [15, 25, 65, 71, 85], "w8a16_gptq": 71, "w8a8": [22, 25, 65], "w8a8_sq_per_channel": [15, 71], "w8a8_sq_per_channel_per_tensor_plugin": [71, 85], "w8a8_sq_per_channel_per_token_plugin": [71, 85], "w8a8_sq_per_tensor_per_token_plugin": [71, 85], "w8a8_sq_per_tensor_plugin": [71, 85], "wa": [0, 1, 3, 5, 6, 15, 27, 28, 29, 68, 70, 74, 75, 76, 78, 80, 81, 82, 84, 91, 93, 94, 96, 100], "wai": [2, 5, 7, 11, 18, 26, 27, 28, 29, 51, 53, 70, 72, 74, 76, 78, 83, 90, 94], "wait": [0, 1, 3, 19, 28, 29, 37, 71, 72, 74, 83], "walk": [35, 59, 62, 76, 77, 78], "wang1120": 94, "wangkuiyi": 94, "want": [5, 12, 19, 26, 27, 29, 33, 68, 71, 73, 74, 78, 80, 82, 83, 93, 94, 96], "warm": 99, "warmup": [20, 29, 73, 74, 76, 94, 98, 99], "warn": [5, 30, 31, 71, 74, 75, 90], "warp": [11, 94], "wast": 28, "watch": 81, "wdkv": 26, "wdq": 26, "we": [1, 2, 4, 6, 7, 10, 11, 12, 13, 15, 19, 20, 24, 25, 26, 27, 28, 29, 31, 32, 33, 43, 47, 48, 50, 51, 59, 60, 66, 68, 70, 73, 74, 75, 76, 77, 78, 80, 81, 83, 88, 89, 93, 94, 96], "weapon": 52, "wear": 52, "web": [18, 33], "weig": 83, "weight": [0, 1, 4, 10, 19, 21, 22, 25, 26, 27, 29, 30, 31, 51, 65, 71, 72, 75, 76, 77, 78, 83, 84, 85, 88, 89, 94], "weight_index": 83, "weight_load": 84, "weight_only_groupwise_quant_matmul": 91, "weight_only_precis": 94, "weight_spars": [30, 71], "weight_stream": [13, 30, 71], "weightonlygroupwisequantmatmulplugin": 91, "weights_dict": 19, "weights_scaling_factor": [15, 17], "weightsinpoint": 1, "weightsoutpoint": 1, "welcom": 29, "well": [5, 6, 16, 18, 22, 29, 37, 73, 80, 81, 91, 92], "were": [0, 1, 12, 15, 19, 21, 25, 28, 75, 77, 80, 94], "weren": 68, "wget": 93, "what": [2, 3, 28, 29, 35, 59, 62, 65, 66, 73, 74, 76, 78, 80, 81], "whatev": 1, "wheel": [66, 68, 94], "when": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 16, 17, 19, 20, 24, 25, 27, 28, 29, 30, 32, 37, 53, 65, 66, 68, 71, 73, 74, 76, 78, 80, 81, 82, 83, 84, 85, 88, 89, 90, 91, 93, 94, 96, 98, 99], "whenev": 1, "where": [0, 1, 2, 5, 6, 8, 9, 11, 12, 15, 16, 21, 25, 26, 27, 28, 29, 31, 34, 36, 37, 59, 61, 63, 71, 74, 75, 78, 80, 82, 83, 88, 89, 91, 94, 100], "wherea": [0, 15, 80], "whether": [0, 1, 2, 3, 5, 10, 29, 30, 31, 71, 77, 78, 81, 83, 84, 88, 97, 98], "which": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 15, 16, 17, 19, 21, 25, 26, 27, 28, 29, 30, 31, 66, 68, 70, 71, 73, 74, 76, 78, 80, 81, 82, 83, 85, 86, 88, 89, 90, 91, 94, 95, 97, 98, 100], "while": [0, 1, 4, 7, 8, 9, 11, 12, 16, 19, 21, 22, 24, 25, 26, 27, 28, 29, 68, 72, 74, 76, 77, 78, 79, 80, 81, 82, 83, 90, 91, 94, 98], "whisper": [91, 92, 94], "whisperencod": 85, "whl": [20, 66, 67, 68], "who": [27, 70], "whole": [1, 71, 72, 83], "whose": [2, 9, 15, 26, 29, 84], "why": [0, 2, 16, 28, 71, 78, 80, 81, 83, 90], "wide": [0, 4, 27, 71, 76], "width": [0, 1, 5, 6, 40, 71, 84, 88, 90, 94], "win": 71, "window": [0, 1, 8, 12, 30, 65, 71, 74, 83, 88, 94], "window_s": 5, "windows": 0, "wip": 26, "wireless": 46, "wirelessaccesspoint": 46, "wise": [7, 29, 71, 83, 94], "wish": 9, "wit": 52, "with_ssh": 32, "within": [1, 2, 5, 8, 11, 12, 16, 28, 29, 52, 71, 74, 77, 78, 80, 81, 83, 89, 99], "without": [0, 1, 3, 5, 11, 12, 16, 17, 20, 25, 26, 29, 30, 37, 52, 72, 74, 78, 81, 83, 85, 94, 96, 98], "wkr": 26, "wo": [17, 26, 94], "wo_gemm": 26, "won": [68, 77], "word": [0, 3, 5, 71, 83, 88, 94], "word_dict": 88, "word_embed": 17, "word_embeddings_layernorm": 17, "work": [5, 6, 7, 8, 11, 12, 16, 19, 20, 29, 37, 53, 56, 57, 58, 60, 66, 68, 72, 75, 79, 83, 88, 91, 93, 94, 96], "workaround": [17, 20, 94], "workdir": [31, 56, 57, 58, 66], "worker": [16, 30, 31, 71, 74, 90, 94], "workerexecutablepath": 0, "workflow": [5, 6, 14, 15, 20, 27, 29, 37, 65, 70, 75, 76, 78, 79, 83, 89, 93, 94, 95], "workload": [4, 11, 16, 28, 29, 30, 73, 74, 76, 78, 79, 80, 81], "workspac": [1, 29, 30, 31, 71, 74, 83, 90, 94], "workstat": 22, "world": [0, 2, 7, 20, 27, 29, 30, 56, 57, 58, 72, 74, 76, 77, 78, 83], "world_config": 88, "world_siz": [15, 19, 83, 94], "worldconfig": [0, 6, 88], "worldsiz": 1, "wors": [12, 30, 78], "worst": [29, 80, 81], "worth": [5, 8, 78, 81], "would": [0, 7, 12, 27, 29, 74, 76, 78, 80, 82, 83, 96], "wouldn": 52, "wpa2": 46, "wqr": 26, "wrap": [0, 1, 16, 30, 70, 76, 83, 86, 88, 94], "wrapped_properti": 71, "wrapper": [1, 7, 19, 29, 98], "write": [0, 1, 9, 17, 26, 29, 30, 65, 83, 93], "written": [16, 74, 83], "wrong": [12, 52, 94], "wsl": 94, "wuk": 26, "wuq": 26, "wuv": 26, "www": 94, "x": [0, 1, 3, 6, 10, 13, 29, 31, 74, 83, 84, 85, 89, 91, 94], "x86": 9, "x86_64": 92, "xcomposer2": 94, "xgrammar": [0, 3, 46, 94], "xl": 94, "xml": 3, "xor": 83, "xqa": 94, "xxx": [17, 19, 93], "xxx_plugin": 86, "xy": 83, "y": [2, 3, 20, 24, 29, 32, 66, 67, 68, 74, 83, 85, 91], "y_bia": 83, "yaml": [29, 31, 74, 75], "yarn": 83, "ye": [2, 83, 90], "yeah": 59, "yelp": 92, "yen": 74, "yet": [0, 6, 19, 22, 26, 29, 83, 100], "yield": [9, 28, 37, 78, 80], "yiyixu": [35, 62], "yml": [20, 27, 31, 38, 74, 75], "york": [31, 34, 36, 61, 63, 89], "you": [3, 4, 5, 6, 7, 9, 10, 12, 15, 16, 18, 19, 20, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 46, 47, 50, 53, 56, 57, 58, 59, 60, 61, 62, 65, 66, 68, 70, 71, 74, 75, 77, 78, 79, 80, 81, 82, 83, 88, 89, 90, 93, 94, 95, 96, 98], "your": [9, 10, 11, 12, 18, 19, 20, 25, 27, 29, 30, 32, 33, 37, 59, 66, 68, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 89, 93, 96, 98, 99], "your_data_path": [20, 27], "your_dockerhub_usernam": [32, 33], "your_model_dir": 27, "your_model_path": [20, 29], "your_public_kei": 33, "your_work_path": 20, "yourself": 95, "yuhuili": [44, 45], "yyi": 93, "z": 83, "zars19": 94, "zero": [0, 1, 3, 17, 70, 71, 83, 84, 91, 93], "zero_is_placehold": 83, "zip": 53, "zjli2013": 94, "zoo": 94, "zoom": 29}, "titles": ["Executor", "Runtime", "Disaggregated-Service (experimental)", "Executor API", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "KV Cache Management: Pools, Blocks, and Events", "KV cache reuse", "Run gpt-2b + LoRA using Executor / cpp runtime", "Low-Precision-AllReduce", "Speculative Sampling", "Running With Weight Streaming to Reduce GPU Memory Consumption", "Adding a Model", "TensorRT-LLM Checkpoint", "Model Definition", "TensorRT-LLM Model Weights Loader", "TensorRT-LLM Architecture", "TensorRT-LLM Build Workflow", "How to get best performance on DeepSeek-R1 in TensorRT-LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs", "DeepSeek R1 MTP Implementation and Optimization", "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers", "Scaling Expert Parallelism in TensorRT-LLM (Part 1: Design and Implementation of Large-scale EP)", "trtllm-build", "trtllm-serve", "Build the TensorRT-LLM Docker Image", "Develop TensorRT-LLM on Runpod", "Curl Chat Client", "Curl Chat Client For Multimodal", "Curl Completion Client", "LLM Common Customizations", "Deepseek R1 Reasoning Parser", "Genai Perf Client", "Genai Perf Client For Multimodal", "LLM Examples Introduction", "LLM Examples", "Automatic Parallelism with LLM", "Generate Text Using Eagle2 Decoding", "Generate Text Using Eagle Decoding", "Generate text with guided decoding", "Generate text", "Generate Text Asynchronously", "Generate Text in Streaming", "Generate text with customization", "Distributed LLM Generation", "Get KV Cache Events", "Control generated text using logits processor", "Generate Text Using Lookahead Decoding", "Generate Text Using Medusa Decoding", "Llm Mgmn Llm Distributed", "Llm Mgmn Trtllm Bench", "Llm Mgmn Trtllm Serve", "Generate text with multiple LoRA adapters", "Generation with Quantization", "OpenAI Chat Client", "OpenAI Chat Client", "OpenAI Completion Client", "Online Serving Examples", "Welcome to TensorRT-LLM\u2019s Documentation!", "Building from Source Code on Linux", "Installing on Grace Hopper", "Installing on Linux", "Key Features", "API Introduction", "API Reference", "Overview", "Performance Analysis", "TensorRT-LLM Benchmarking", "Overview", "Benchmarking Default Performance", "Deciding Model Sharding Strategy", "FP8 Quantization", "Performance Tuning Guide", "Tuning Max Batch Size and Max Num Tokens", "Useful Build-Time Flags", "Useful Runtime Options", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Quick Start Guide", "Memory Usage of TensorRT-LLM", "Numerical Precision", "Support Matrix", "Troubleshooting", "Release Notes", "PyTorch Backend", "Adding a New Model in PyTorch Backend", "Architecture Ovewiew", "Attention", "KV Cache Manager", "Scheduler"], "titleterms": {"": [5, 22, 25, 65], "0": 94, "000": [22, 23], "1": [14, 16, 20, 29, 66, 75, 90, 94], "10": [22, 94], "100m": 22, "11": 94, "12": [23, 94], "13": 94, "13b": 23, "14": 94, "15": 94, "16": 94, "17": 94, "18": 94, "180b": 21, "19": 94, "2": [14, 20, 24, 29, 66, 90, 94], "2b": 10, "3": [14, 16, 20, 29, 74, 75, 90, 92], "4": [14, 20, 22], "405b": [16, 75], "4x": 24, "5": 20, "6": [20, 21], "6x": 22, "7": 94, "70b": [16, 21, 24, 74, 75], "7x": 21, "8": 94, "8b": 75, "9": 94, "A": 28, "As": 3, "For": [35, 40], "In": [3, 5, 72], "Not": 90, "One": [26, 66], "The": [3, 29, 91], "To": 76, "With": [13, 72], "a100": [21, 22], "about": [12, 31, 72, 77], "absorb": 28, "accept": [26, 27], "access": 32, "account": 33, "accuraci": [11, 25, 27], "achiev": [22, 23, 27], "acknowledg": [26, 27, 28, 29], "activ": [84, 90], "ad": [14, 96], "adapt": 59, "addit": 3, "adp": 28, "advanc": 65, "algorithm": 11, "alibi": 5, "allreduc": 11, "an": 8, "analysi": 73, "announc": 94, "api": [3, 7, 13, 19, 31, 41, 70, 71, 76, 89, 94, 97], "arbitrari": 3, "architectur": [18, 26, 65, 97], "argument": 30, "asynchron": 48, "asyncio": 37, "attent": [5, 15, 26, 27, 28, 72, 80, 81, 82, 84, 98], "attentionbackend": 98, "attentionmetadata": 98, "auto": 30, "automat": 43, "autoregress": 26, "avoid": 76, "awq": [15, 21, 91], "b200": [20, 26], "backend": [26, 92, 95, 96, 98], "background": [26, 27], "balanc": [26, 29], "base": [27, 37], "baselin": 78, "basic": 27, "batch": [3, 5, 72, 80], "beam": [3, 5], "befor": [74, 76], "begin": 76, "behavior": 74, "bench": [57, 73, 76], "benchmark": [2, 20, 25, 31, 74, 75, 76], "best": [20, 25], "bf16": 91, "bia": 5, "bind": [3, 16, 66], "blackwel": [28, 91], "block": 8, "blockmanag": 8, "boost": 74, "boundari": 26, "budget": 24, "buffer": [5, 78, 90], "buffermanag": 1, "build": [15, 19, 20, 30, 32, 33, 37, 66, 74, 76, 81], "c": [3, 6, 29, 66, 90], "cach": [5, 8, 9, 15, 52, 78, 82, 90, 99], "cachecommun": 0, "can": [9, 72], "capac": 82, "case": 80, "cast": 84, "caveat": 74, "chang": [13, 80, 94], "chat": [31, 34, 35, 61, 62], "checkpoint": 15, "choos": 25, "chunk": [5, 20, 80, 82], "class": 3, "classic": 7, "cli": [19, 76], "client": [34, 35, 36, 39, 40, 61, 62, 63], "clock": [20, 74], "close": [21, 24], "code": 66, "collect": [29, 73], "combin": 20, "come": 25, "command": 75, "common": [1, 37, 72], "commun": [26, 29, 77], "compil": [16, 20, 66, 89], "complet": [31, 36, 63], "compon": [6, 95], "conclus": [78, 80, 81], "config": [15, 30], "configur": [3, 6, 10, 26, 29, 33, 37, 78, 81, 96], "connect": 33, "consider": 11, "consumpt": 13, "contain": [20, 32, 66], "content": [20, 26, 27, 28, 29, 79, 96], "context": [3, 5, 20, 80, 81, 82], "contigu": 5, "control": [3, 53], "conv": 84, "convers": [14, 19], "coordin": 73, "core": [29, 96], "cpp": 10, "creat": [33, 66], "cross": 5, "cuda": 26, "cudaev": 1, "cudastream": 1, "curl": [34, 35, 36], "custom": [17, 37, 50, 99, 100], "cutlass": 26, "cyclic": 5, "data": 28, "dataset": [20, 29, 74, 75, 76], "datatransceiverst": 0, "debug": [2, 73, 93], "decid": 77, "decod": [3, 12, 27, 30, 44, 45, 46, 54, 55, 90, 97], "decoderst": 1, "decodinginput": 1, "decodingoutput": 1, "decor": 7, "deep": 28, "deepseek": [20, 26, 27, 28, 38], "default": [20, 26, 74, 76], "definit": [16, 89, 96], "dens": 26, "depend": 26, "deploi": 89, "dequant": 91, "descript": 73, "design": 29, "detail": [10, 91], "develop": [28, 33, 95], "diagram": 26, "differ": 3, "disabl": 37, "disaggreg": [2, 31], "disaggregated_mpi_work": 31, "disaggserverutil": 0, "distribut": [51, 56], "dive": 28, "do": 72, "docker": [32, 33, 66], "dockerhub": [32, 33], "document": [65, 94], "dora": 10, "download": 20, "dq": 91, "draft": 12, "e2": [29, 93], "eagl": [12, 27, 45], "eagle2": 44, "eagle3": 27, "eaglebuff": 1, "eaglemodul": 1, "effect": 29, "embed": [5, 84], "enabl": [4, 9, 20, 32, 73, 78, 81], "endpoint": 31, "engin": [15, 16, 70, 74, 76, 89, 97], "enhanc": 94, "environ": [2, 11], "ep": [28, 29], "eplb": 29, "error": 93, "etp": 26, "evalu": [15, 27, 29], "event": [8, 52], "everyth": 26, "exampl": [2, 3, 10, 15, 16, 17, 41, 42, 64, 73, 74], "except": 90, "execut": 93, "executor": [0, 3, 10], "expand": 29, "expect": [9, 20], "experiment": 2, "expert": [4, 26, 28, 29], "explicitdrafttokensbuff": 1, "explor": 20, "extens": 29, "face": 70, "factor": [5, 15], "falcon": 21, "faq": [2, 90], "faster": 21, "featur": [20, 69, 73, 94], "file": 66, "first": 22, "fix": [27, 94], "flag": [81, 91], "flayerinfo": 7, "flight": [3, 5, 72], "flow": 74, "fmha": 5, "format": [10, 20], "fp16": 91, "fp32": 91, "fp4": 75, "fp8": [5, 15, 22, 72, 75, 78, 91], "fraction": 82, "free": 82, "from": 66, "full": 66, "fulli": 17, "function": [7, 17, 83], "fuse_a_gemm": 26, "fusion": [16, 26, 78, 81], "futur": [26, 27, 28, 37], "garbag": 73, "gate": 78, "gb200": 29, "gc": 73, "gemm": [26, 78, 81], "genai": [39, 40], "gener": [2, 5, 29, 37, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 59, 60], "get": [20, 52, 65], "gil": 73, "gpt": [6, 10], "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptq": 91, "gpu": [13, 16, 20, 21, 26, 28, 29, 72, 74, 82, 90], "grace": 67, "graph": [7, 26], "group": [5, 26], "gsm8k": 29, "guid": [3, 46, 79, 89, 95, 96], "h": [0, 1], "h100": [22, 23], "h200": [20, 21, 23, 24], "ha": 22, "hardwar": 92, "hbm": 23, "head": 5, "header": 66, "hierarchi": 8, "high": [7, 29], "hopper": [67, 91], "host": [9, 29], "how": [4, 9, 20, 26, 27, 28, 74, 77, 80], "hub": 70, "hug": 70, "i": [22, 77, 90], "ibuff": 1, "id": 10, "igptdecoderbatch": 1, "imag": [32, 33, 66], "implement": [14, 26, 27, 29, 98], "import": 5, "improv": 12, "increas": 24, "indic": 65, "infer": [3, 25, 27, 29, 31, 72, 89, 90], "inform": [7, 73, 89], "infrastructur": 94, "input": 5, "instal": [20, 65, 67, 68, 93], "int4": [21, 91], "int8": [5, 91], "interfac": [29, 99], "intern": 6, "introduct": [28, 29, 41, 70, 96, 99, 100], "ipcnvlsmemori": 1, "ipcutil": 1, "isl": 20, "issu": [20, 27, 90, 94, 95], "itensor": 1, "iter": 73, "kei": [17, 26, 33, 69, 77, 94, 95], "kernel": [24, 26, 29], "knowledg": 79, "known": [27, 66, 90, 94, 95], "kv": [5, 8, 9, 15, 52, 78, 82, 90, 99], "kvcacheeventmanag": 8, "kvcachemanag": 97, "larg": 29, "latenc": [20, 24, 26, 74, 76, 78], "latest": [23, 72], "launch": [26, 73], "layer": [26, 28, 84], "layernorm": 15, "layout": 17, "level": [7, 26, 29, 97], "limit": [12, 66, 74, 94], "linear": 84, "link": 66, "linux": [66, 68], "llama": [16, 21, 24, 74, 75, 78, 81], "llama2": 23, "llm": [4, 12, 15, 17, 18, 19, 20, 22, 23, 25, 27, 29, 32, 33, 37, 41, 42, 43, 51, 56, 57, 58, 65, 66, 70, 72, 74, 76, 80, 89, 90, 92, 94], "load": [17, 29, 96], "loader": 17, "local": 70, "logic": 29, "logit": [3, 30, 53], "lookahead": [12, 54], "lookaheadbuff": 1, "lookaheadmodul": 1, "lookup": 12, "lora": [10, 30, 59], "loracach": [1, 10], "loracachepagemanagerconfig": 1, "loramodul": 1, "low": [11, 74, 78], "machin": 29, "make": 15, "manag": [7, 8, 74, 99], "map": [10, 74], "mark": 3, "marker": 73, "match": 16, "matrix": [91, 92], "max": [20, 74, 80, 82], "maximum": 82, "measur": 75, "medusa": [12, 55, 74], "medusamodul": 1, "memori": [9, 13, 20, 23, 82, 90], "memorycount": 1, "method": [7, 25], "metric": 31, "mgmn": [56, 57, 58], "min": 20, "miscellan": 29, "mix": 26, "mixtur": 4, "mla": 28, "mlp": [15, 78, 84], "mlperf": 22, "modal": [74, 92], "mode": 74, "model": [6, 12, 14, 16, 17, 18, 20, 26, 27, 70, 74, 75, 77, 78, 81, 85, 89, 92, 93, 94, 96, 97], "modelconfig": 1, "modul": [7, 10, 27, 28], "moe": [4, 28], "moe_backend": 26, "more": [20, 24, 73], "motiv": 29, "mqa": 28, "mtp": [26, 27], "multi": [5, 16, 26, 31, 72, 74, 92], "multimod": [31, 35, 40], "multipl": [59, 81], "name": [17, 30], "nativ": [17, 72], "nearli": 23, "network": 74, "new": [14, 24, 96, 98], "next": [25, 89], "node": [16, 31, 72], "non": 74, "norm": [78, 81], "normal": 84, "note": [3, 5, 94], "nsight": 73, "num": 80, "numer": 91, "nvfp4": 91, "nvidia": [26, 28, 73], "nvtx": 73, "o": 90, "observ": 29, "obtain": 3, "offlin": 29, "offload": 9, "one": 29, "onli": [26, 66, 73, 91], "onlin": [29, 64], "openai": [61, 62, 63], "optim": [5, 26, 27, 28, 81], "option": [20, 66, 78, 81, 82], "osl": 20, "other": 74, "out": [20, 96], "output": [3, 74], "over": [21, 29], "overview": [6, 15, 17, 19, 72, 75], "ovewiew": 97, "own": 100, "p": 9, "pack": 5, "pad": 5, "page": [5, 8, 72, 80, 81, 82], "parallel": [4, 10, 26, 28, 29, 30, 43, 74, 77, 81], "paramet": 6, "parser": 38, "part": [14, 29], "pattern": [7, 16], "perf": [39, 40], "perform": [9, 11, 12, 20, 22, 25, 26, 29, 65, 73, 76, 78, 79, 81], "persist": 74, "phase": 5, "pipelin": [77, 81], "pitfal": 76, "plugin": [16, 30, 78, 81, 86], "pod": 33, "polici": 82, "pool": [8, 84, 90], "posit": 5, "post": 3, "postprocess": 17, "power": 74, "practic": 25, "precis": [11, 26, 28, 91], "prepar": [15, 20, 33, 70, 74, 75, 76], "prerequisit": [20, 66, 79, 89, 96], "prevent": 9, "processor": [3, 53], "profil": [26, 73, 81], "programmat": 26, "prompt": 12, "prompttuningparam": 1, "provid": 24, "push": 26, "pyexecutor": 97, "python": [3, 29, 66, 90], "pytorch": [73, 74, 92, 95, 96], "q": 91, "qkv": 5, "quantiz": [15, 19, 25, 37, 60, 74, 78, 87, 91, 95], "quantmod": 91, "queri": 5, "quick": [89, 95], "quickstart": 74, "r1": [20, 26, 27, 28, 38], "rab": 5, "rank": 15, "rawengin": 1, "re": 26, "reason": 38, "recommend": [78, 81, 90], "record_signatur": 7, "redraft": 12, "reduc": [13, 78, 81], "refer": [14, 65, 71], "regist": 14, "registr": 96, "rel": 5, "relat": [7, 89], "relax": [26, 27], "releas": 94, "reproduc": [20, 26, 28, 29, 75], "request": [1, 3], "requir": [7, 11], "resourcemanag": 97, "respons": 3, "result": [3, 20, 73, 75, 76], "retriev": 7, "reus": 9, "revisit": 80, "rewrit": 7, "right": 25, "roll": 5, "rope": 5, "rotari": 5, "router": 26, "routergemm": 26, "run": [10, 13, 20, 27, 29, 73, 74, 75, 76, 89], "runpod": 33, "runtim": [1, 6, 10, 16, 28, 37, 66, 82, 88, 90], "runtimedefault": 1, "same": 24, "sampl": [6, 12, 37], "samplingconfig": 1, "save": 76, "scale": [5, 15, 29], "scatter": 81, "schedul": [80, 82, 97, 100], "script": [42, 64], "search": 5, "sec": 23, "send": 3, "serial": 0, "serv": [31, 58, 64, 73, 89], "server": [3, 31, 89], "servic": 2, "set": [74, 77], "shard": 77, "shoot": 17, "singl": 21, "situat": 9, "size": [80, 82, 90], "slide": 5, "slurm": 31, "smart": 26, "smoothquant": 91, "softwar": 92, "sota": 25, "sourc": 66, "spars": 26, "specif": 73, "specul": [12, 27, 30], "speculativedecodingmod": 1, "speculativedecodingmodul": 1, "speed": 25, "speedup": 27, "ssh": [32, 33], "start": [31, 65, 89, 95], "statist": 29, "step": [14, 20, 29, 66, 89, 96], "strategi": [26, 28, 77], "stream": [13, 26, 49], "streamingllm": 5, "structur": 3, "studi": [27, 29, 80], "style": 37, "subcommand": 74, "summari": [74, 78, 81], "support": [16, 17, 20, 27, 66, 70, 72, 74, 91, 92], "swiglu": 78, "syntax": 31, "system": [26, 73], "tabl": [20, 26, 27, 28, 29, 65, 79, 96], "target": 12, "technic": 91, "techniqu": 25, "templat": 33, "tensor": [0, 3, 4, 5, 7, 10, 77, 90], "tensorrt": [4, 12, 15, 16, 17, 18, 19, 20, 22, 23, 25, 27, 29, 32, 33, 65, 66, 70, 72, 74, 76, 80, 89, 90, 92, 94], "test": 93, "text": [44, 45, 46, 47, 48, 49, 50, 53, 54, 55, 59], "think": 77, "thought": 29, "throughput": [20, 24, 28, 74, 75, 76], "time": [81, 90], "tip": [70, 76, 93], "tllmlogger": 1, "tok": 22, "token": [22, 23, 37, 80, 82], "tool": 19, "top": 97, "topologi": 11, "transferag": 0, "translat": [17, 29], "tree": [12, 27, 96], "trigger": 8, "triton": [3, 89], "troubl": 17, "troubleshoot": [2, 70, 76, 93], "trt": 25, "trtllm": [26, 30, 31, 57, 58, 73, 76, 89], "tune": [9, 20, 79, 80], "type": [0, 8], "understand": [80, 90], "unit": 93, "up": [21, 24, 25], "updat": 94, "upload": [32, 33], "us": [7, 10, 12, 44, 45, 53, 54, 55, 81, 82, 90], "usag": [2, 11, 90], "user": 78, "v": [4, 23], "valid": 74, "vanilla": 27, "variabl": [2, 11, 75], "verif": 26, "verifi": 14, "via": 76, "visual": 73, "w4a16": 91, "w8a16": 91, "w8a8": 91, "weight": [13, 14, 15, 16, 17, 18, 28, 90, 91, 96], "welcom": 65, "what": [8, 22, 25, 72], "when": [7, 26], "width": 3, "window": [5, 72, 82], "windowblockmanag": 8, "wip": 20, "within": 24, "without": 66, "work": [26, 27, 28, 74], "workflow": [7, 17, 19, 73, 74], "workload": 26, "world": 6, "worldconfig": 1, "write": 14, "xqa": [5, 24], "you": [72, 76], "your": 100}})
\ No newline at end of file
diff --git a/latest/torch.html b/latest/torch.html
index e23065ca67..70e5446592 100644
--- a/latest/torch.html
+++ b/latest/torch.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -63,7 +63,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -737,9 +737,9 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/adding_new_model.html b/latest/torch/adding_new_model.html
index 07ec6e7360..7143471d79 100644
--- a/latest/torch/adding_new_model.html
+++ b/latest/torch/adding_new_model.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -605,8 +605,8 @@
 
     <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
                 <span class="n">attn_metadata</span><span class="p">:</span> <span class="n">AttentionMetadata</span><span class="p">,</span>
-                <span class="n">input_ids</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-                <span class="n">position_ids</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+                <span class="n">input_ids</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">IntTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+                <span class="n">position_ids</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">IntTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
                 <span class="n">inputs_embeds</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">FloatTensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
         <span class="c1"># Define the forward computation of the model</span>
         <span class="o">...</span>
@@ -848,9 +848,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/arch_overview.html b/latest/torch/arch_overview.html
index cbd02c40c2..63ec4794ac 100644
--- a/latest/torch/arch_overview.html
+++ b/latest/torch/arch_overview.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -714,9 +714,9 @@ The document <a class="reference internal" href="kv_cache_manager.html"><span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/attention.html b/latest/torch/attention.html
index 3f4be2531c..845dd60b0e 100644
--- a/latest/torch/attention.html
+++ b/latest/torch/attention.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -838,9 +838,9 @@ For example, the Flashinfer metadata fills <code class="docutils literal notrans
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/kv_cache_manager.html b/latest/torch/kv_cache_manager.html
index d37fa73257..f270449d61 100644
--- a/latest/torch/kv_cache_manager.html
+++ b/latest/torch/kv_cache_manager.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -707,9 +707,9 @@ Then, test it to ensure the <code class="docutils literal notranslate"><span cla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/scheduler.html b/latest/torch/scheduler.html
index 225fef0c00..130f9ddd2b 100644
--- a/latest/torch/scheduler.html
+++ b/latest/torch/scheduler.html
@@ -51,7 +51,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc0';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.21.0rc1';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -61,7 +61,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="0.21.0rc0" />
+  <meta name="docsearch:version" content="0.21.0rc1" />
 
 
   </head>
@@ -732,9 +732,9 @@ In the <code class="docutils literal notranslate"><span class="pre">create_pytor
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on June 03, 2025.</p>
+  <p>Last updated on June 09, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ae2ce6">9ae2ce6</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9c012d5">9c012d5</a>.</p>
   
 </div></div>